qbic-projects · nadnein · Mar 26, 2025 · Mar 25, 2025 · Mar 25, 2025 · Mar 26, 2025
diff --git a/app/build.gradle b/app/build.gradle
@@ -20,6 +20,10 @@ repositories {
     mavenCentral()
 }
 
+sourceSets {
+    main.resources.srcDir('src/resources')
+}
+
 dependencies {
     // Use the latest Groovy version for building this library
     implementation libs.groovy.all

diff --git a/app/src/main/groovy/org/cpuInfoFetcher/AmpereSpecificationsFetcher.groovy b/app/src/main/groovy/org/cpuInfoFetcher/AmpereSpecificationsFetcher.groovy
@@ -81,7 +81,9 @@ class AmpereSpecificationsFetcher extends SpecificationsFetcher {
         )
 
         // Load local specifications file as it is only available as .png on the website 
-        DataFrame spec_file = Csv.load(Paths.get('..', 'input_data', 'Ampere_One_Family_Specifications_2025-03-20.csv'))
+        DataFrame spec_file = Csv.load(
+                Paths.get(this.class.getResource('/Ampere_One_Family_Specifications_2025-03-20.csv').toURI())
+        )
         specifications = manually_add_processor_specifications(specifications, spec_file)
 
         return specifications

diff --git a/app/src/main/groovy/org/cpuInfoFetcher/Main.groovy b/app/src/main/groovy/org/cpuInfoFetcher/Main.groovy
@@ -1,6 +1,14 @@
 package org.cpuinfofetcher
 
+import org.cpuinfofetcher.utils.Helpers
 import org.cpuinfofetcher.utils.UnitsAdapter
+import org.dflib.Exp
+import org.dflib.Series
+import org.dflib.Printers
+
+import static org.dflib.Exp.*
+import java.time.LocalDateTime
+
 
 import java.nio.file.Files
 import java.util.logging.Logger
@@ -81,10 +89,13 @@ class Main {
         return specifications.rows().selectUnique('name')
     }
 
+
+
     static void main(String[] args) {
         this.days_until_outdated = args.length > 0 ? Integer.parseInt(args[0]) : 28
 
         Files.createDirectories(Paths.get('..', 'specifications_out'))
+        Files.createDirectories(Paths.get('..', 'nf-co2footprint'))
 
         // Collecting Info
         LOGGER.entering('Main', 'main')
@@ -111,11 +122,21 @@ class Main {
         selected_specifications = ua.unitToColumnName(selected_specifications, this.units_mapping)
         LOGGER.info('Extracted units from data.')
 
+        // adjusts format of tdp values to make them uniform
+        selected_specifications = ua.extractFirstNumber(selected_specifications)
+
+        // add default TDPs
+        selected_specifications = ProcessSpecificationsTable.computeDefaultTdps(selected_specifications)
+        LOGGER.info('Added default TDP values.')
+
         Csv.save(selected_specifications, Paths.get('..', 'specifications_out', 'specifications_filtered.csv'))
+        Csv.save(selected_specifications, Paths.get('..', 'nf-co2footprint', 'CPU_TDP.csv'))
+
         this.selected_specifications = selected_specifications
         LOGGER.info('Saved final results.')
 
 
+
         LOGGER.exiting('Main', 'main')
     }
 

diff --git a/app/src/main/groovy/org/cpuInfoFetcher/ProcessSpecificationsTable.groovy b/app/src/main/groovy/org/cpuInfoFetcher/ProcessSpecificationsTable.groovy
@@ -0,0 +1,58 @@
+package org.cpuinfofetcher
+import org.cpuinfofetcher.utils.Helpers
+import org.dflib.DataFrame
+import java.time.LocalDateTime
+import static org.dflib.Exp.$col
+
+/**
+ * Adds default TDP values to the specifications DataFrame.
+ *
+ * Groups by 'intended_usage', computes averages for cores, threads,
+ * and TDP, adds a row for "unknown" usage, and updates the DataFrame with default entries.
+ *
+ * @param specifications the input DataFrame
+ * @return the updated DataFrame with default TDP values
+ */
+static DataFrame computeDefaultTdps(DataFrame specifications) {
+    DataFrame aggregatedDf = specifications.group('intended_usage').agg(
+            $col('intended_usage').first().as('intended_usage'),
+            $col('cores').castAsInt().avg().as("avg_cores"),
+            $col('threads').castAsInt().avg().as("avg_threads"),
+            $col('tdp (W)').castAsDouble().avg().as("avg_tdp"))
+
+    DataFrame local_server_rows = aggregatedDf.rows({ it.get('intended_usage') == 'local' || it.get('intended_usage') == 'server' })
+            .select()
+
+    Double unknown_avg_cores = (local_server_rows.sum { it.get('avg_cores') } as Double) / local_server_rows.height()
+    Double unknown_avg_threads = (local_server_rows.sum { it.get('avg_threads') } as Double) / local_server_rows.height()
+    Double unknown_avg_tdp = (local_server_rows.sum { it.get('avg_tdp') } as Double) / local_server_rows.height()
+
+    aggregatedDf = aggregatedDf.addRow([
+            "intended_usage": "unknown",
+            "avg_cores": unknown_avg_cores,
+            "avg_threads": unknown_avg_threads,
+            "avg_tdp": unknown_avg_tdp
+
+    ])
+
+    for (int i = 0; i < aggregatedDf.height(); i++) {
+        DataFrame row = aggregatedDf.rows(i).select()
+        Double avgThreads = row.get("avg_threads", 0) as Double
+        Double avgCores = row.get("avg_cores", 0) as Double
+        Double avgtdp = row.get("avg_tdp", 0) as Double
+        Double computedThreads = avgCores != 0 ? avgThreads / avgCores : 0
+        String intended_usage = row.get("intended_usage", 0)
+        specifications = specifications.addRow([
+                'product_id': "default $intended_usage",
+                'name':  "default $intended_usage",
+                "time": LocalDateTime.now().toString(),
+                'source':  "default $intended_usage",
+                "intended_usage":  "default $intended_usage",
+                'tdp (W)': Helpers.round(avgtdp),
+                "cores": 1,
+                "threads": Helpers.round(computedThreads)
+        ])
+    }
+
+    return specifications
+}
diff --git a/app/src/main/groovy/org/cpuInfoFetcher/utils/Helpers.groovy b/app/src/main/groovy/org/cpuInfoFetcher/utils/Helpers.groovy
@@ -0,0 +1,26 @@
+package org.cpuinfofetcher.utils
+
+import org.dflib.DataFrame
+
+static boolean assertEqualDF(DataFrame df1, DataFrame df2) {
+    assert df1.size() == df2.size()
+    assert df1.getColumnsIndex().toArray() == df2.getColumnsIndex().toArray()
+    for (int i = 0; i < df1.width(); i++) {
+        for (int j = 0; j < df1.height(); j++) {
+            assert df1.get(i, j) == df2.get(i, j)
+        }
+    }
+    return true
+}
+
+/**
+ * Rounds a Double value to two decimal places.
+ *
+ * @param value the Double to round
+ * @return the value rounded to two decimal places
+ */
+static Double round(Double value) {
+    Double rounded_value = Math.round(value * 100.0) / 100.0
+    return rounded_value
+}
+
diff --git a/app/src/main/groovy/org/cpuInfoFetcher/utils/UnitsAdapter.groovy b/app/src/main/groovy/org/cpuInfoFetcher/utils/UnitsAdapter.groovy
@@ -7,7 +7,7 @@ import org.dflib.DataFrame
  */
 class UnitsAdapter {
 
-    DataFrame unitToColumnName(DataFrame df, Map<String, String[]> unit_mapping) {
+    static DataFrame unitToColumnName(DataFrame df, Map<String, String[]> unit_mapping) {
         List<String> old_col_names = df.getColumnsIndex().toArray()
         // Define new column names with units
         List<String> new_col_names = []
@@ -42,4 +42,32 @@ class UnitsAdapter {
         return new_df
     }
 
+    /**
+     * Extracts the first numeric value from the 'tdp (W)' column and updates the DataFrame.
+     *
+     * Examples of extraction:
+     * - "15-30"   --> 15
+     * - "1.5/20"  --> 1.5
+     * - "3.1--6"  --> 3.1
+     *
+     * @param df the input DataFrame
+     * @return a DataFrame with the updated 'tdp (W)' column containing only the first numeric value
+     */
+    static DataFrame extractFirstNumber(DataFrame df) {
+        DataFrame old_df = df.cols().selectAs(Map.of("tdp (W)", "tdp old"))
+        def new_df = DataFrame.empty("tdp (W)")
+
+        for (int i = 0; i < old_df.height(); i++) {
+            String tdp_value = old_df.rows(i).select().get("tdp old", 0)
+            // Use the matcher to extract the first number
+            def matcher = tdp_value =~ /^[0-9]*\.?[0-9]+/ // Regex to match the first number (integer or decimal)
+            def new_tdp_value = matcher.find() ? Double.parseDouble(matcher.group(0)) : null
+            new_df = new_df.addRow("tdp (W)": new_tdp_value)
+
+        }
+        new_df = old_df.hConcat(new_df).colsExcept('tdp old').select()
+
+        return new_df
+    }
+
 }
diff --git a/..._One_Family_Specifications_2025-03-20.csv → ..._One_Family_Specifications_2025-03-20.csv b/..._One_Family_Specifications_2025-03-20.csv → ..._One_Family_Specifications_2025-03-20.csv
diff --git a/...tcher/AMDSpecificationsFetcherTest.groovy → ...tcher/AMDSpecificationsFetcherTest.groovy b/...tcher/AMDSpecificationsFetcherTest.groovy → ...tcher/AMDSpecificationsFetcherTest.groovy
@@ -58,7 +58,7 @@ class AMDSpecificationsFetcherTest extends Specification {
             DataFrame specifications = Csv.load(this.tempPath.resolve('AMD_processor_specifications.csv'))
 
         then:
-            specifications.getColumnsIndex().toArray() == [
+            specifications.getColumnsIndex().toList() == [
                 'time', 'source', 'intended_usage', 'name', 'Family', 'Series', 'Form Factor', '# of CPU Cores', '# of Threads',
                 'Max. Boost Clock', 'Base Clock', 'L2 Cache', 'L3 Cache', 'Default TDP', 'L1 Cache',
                 'AMD Configurable TDP (cTDP)', 'Processor Technology for CPU Cores', 'Unlocked for Overclocking',

diff --git a/...er/AmpereSpecificationsFetcherTest.groovy → ...er/AmpereSpecificationsFetcherTest.groovy b/...er/AmpereSpecificationsFetcherTest.groovy → ...er/AmpereSpecificationsFetcherTest.groovy
@@ -52,7 +52,7 @@ class AmpereSpecificationsFetcherTest extends Specification {
             DataFrame specifications = Csv.load(this.tempPath.resolve('Ampere_cpu_specifications.csv'))
 
         then:
-            specifications.getColumnsIndex().toArray() == [
+            specifications.getColumnsIndex().toList() == [
                 'time', 'source' , 'intended_usage', 'name', 'CORES', 'SUSTAINED FREQUENCY (GHz)', 'USAGE POWER (W)', 'product_id'
             ]
     }

diff --git a/...er/CPUSpecificationsSummarizerTest.groovy → ...er/CPUSpecificationsSummarizerTest.groovy b/...er/CPUSpecificationsSummarizerTest.groovy → ...er/CPUSpecificationsSummarizerTest.groovy
@@ -18,7 +18,7 @@ class CPUSpecificationsSummarizerTest extends Specification {
     private final CPUSpecificationsSummarizer summarizer = new CPUSpecificationsSummarizer()
 
     private final DataFrame exampleDF = DataFrame.foldByRow('A', 'B').of('1', '2')
-    private final Map<String, String[]> aliases = ['A': ['A'], 'C': ['X', 'B'], 'B': ['C']]
+    private final Map<String, String[]> aliases = ['A': ['A'], 'C': ['X', 'B'], 'B': ['C']] as Map<String, String[]>
 
     boolean assertEqualDF(DataFrame df1, DataFrame df2) {
         assert df1.size() == df2.size()

diff --git a/...her/IntelSpecificationsFetcherTest.groovy → ...her/IntelSpecificationsFetcherTest.groovy b/...her/IntelSpecificationsFetcherTest.groovy → ...her/IntelSpecificationsFetcherTest.groovy
@@ -59,7 +59,7 @@ class IntelSpecificationsFetcherTest extends Specification  {
             DataFrame specifications = Csv.load(this.tempPath.resolve('Intel_family_info.csv'))
 
         then:
-            specifications.getColumnsIndex().toArray() == [
+            specifications.getColumnsIndex().toList() == [
                 'product_id', 'name', 'time', 'source', 'intended_usage', 'url'
             ]
     }
@@ -94,7 +94,7 @@ class IntelSpecificationsFetcherTest extends Specification  {
             DataFrame specifications = Csv.load(this.tempPath.resolve('Intel_Processor_Useries.csv'))
 
         then:
-            specifications.getColumnsIndex().toArray() == [
+            specifications.getColumnsIndex().toList() == [
                 'product_id', 'name', 'time', 'source', 'intended_usage', 'url'
             ]
     }
@@ -130,7 +130,7 @@ class IntelSpecificationsFetcherTest extends Specification  {
             DataFrame specifications = Csv.load(this.tempPath.resolve('some_name.csv'))
 
         then:
-            specifications.getColumnsIndex().toArray() == [
+            specifications.getColumnsIndex().toList() == [
                 'product_id', 'name', 'time', 'source', 'intended_usage', 'Product Collection', 'Code Name', 'Vertical Segment',
                 'Processor Number', 'Lithography', 'Total Cores', 'Processor Base Frequency', 'Cache', 'Bus Speed',
                 'FSB Parity', 'TDP', 'VID Voltage Range', 'Marketing Status', 'Launch Date', 'Servicing Status',

diff --git a/app/src/test/groovy/org/cpuinfofetcher/ProcessSpecificationsTableTest.groovy b/app/src/test/groovy/org/cpuinfofetcher/ProcessSpecificationsTableTest.groovy
@@ -0,0 +1,31 @@
+package org.cpuinfofetcher
+
+import org.cpuinfofetcher.utils.Helpers
+import org.dflib.DataFrame
+import spock.lang.Specification
+
+class ProcessSpecificationsTableTest extends Specification {
+
+    def 'check default tdp computation'() {
+        setup:
+        DataFrame input = DataFrame
+                .foldByColumn("intended_usage", "cores", "threads", "tdp (W)")
+                .of("local", "server", "embedded", "local", "server", "embedded",
+                        2, 8, 4, 4, 16, 4,
+                        2, 16, 8, 4, 32, 8,
+                        15.0, 15.0, 1.5, 15.0, 10.0, 1.5)
+        DataFrame expected = DataFrame
+                .foldByColumn("intended_usage", "cores", "threads", "tdp (W)")
+                .of("local", "server", "embedded", "local", "server", "embedded", "default local", "default server", "default embedded", "default unknown",
+                        2, 8, 4, 4, 16, 4, 1, 1, 1, 1,
+                        2, 16, 8, 4, 32, 8, 1, 2.0, 2.0, 1.8,
+                        15.0, 15.0, 1.5, 15.0, 10.0, 1.5, 15.0, 12.5, 1.5, 13.75)
+
+        when:
+        DataFrame output = ProcessSpecificationsTable.computeDefaultTdps(input)
+
+        then:
+        Helpers.assertEqualDF(expected, output)
+
+    }
+}
diff --git a/...oFetcher/SpecificationsFetcherTest.groovy → ...ofetcher/SpecificationsFetcherTest.groovy b/...oFetcher/SpecificationsFetcherTest.groovy → ...ofetcher/SpecificationsFetcherTest.groovy
@@ -22,7 +22,7 @@ import spock.lang.Specification
  * @version 1.0
  * @since 1.0
  */
-public class SpecificationsFetcherTest extends Specification {
+class SpecificationsFetcherTest extends Specification {
 
     private final SpecificationsFetcher sf = new SpecificationsFetcher()
     private final Path tempPath = Files.createTempFile('file', '.tmp')

diff --git a/...InfoFetcher/utils/UnitsAdapterTest.groovy → ...infofetcher/utils/UnitsAdapterTest.groovy b/...InfoFetcher/utils/UnitsAdapterTest.groovy → ...infofetcher/utils/UnitsAdapterTest.groovy
@@ -15,16 +15,6 @@ class UnitsAdapterTest extends Specification {
     private final DataFrame exampleDF = DataFrame.foldByRow('A', 'B', 'C').of('1 W', '2 Beta', '3')
     private final Map<String, List<String>> units = ['A': ['W', 'Watt'], 'C': ['C', 'Coloumb'], 'X': ['I', 'Imaginary']]
 
-    boolean assertEqualDF(DataFrame df1, DataFrame df2) {
-        assert df1.size() == df2.size()
-        assert df1.getColumnsIndex().toArray() == df2.getColumnsIndex().toArray()
-        for (int i = 0; i < df1.width(); i++) {
-            for (int j = 0; j < df1.height(); j++) {
-                assert df1.get(i, j) == df2.get(i, j)
-            }
-        }
-        return true
-    }
 
     def 'check extraction of units'() {
         setup:
@@ -33,7 +23,24 @@ class UnitsAdapterTest extends Specification {
             DataFrame result = this.ua.unitToColumnName(this.exampleDF, this.units)
 
         then:
-            assertEqualDF(result, expected)
+            Helpers.assertEqualDF(result, expected)
+    }
+
+    def 'check tdp value extraction'() {
+        setup:
+            DataFrame input = DataFrame
+                .foldByColumn("test_col", "tdp (W)")
+                .of("test_val_1", "test_val_2", "test_val_3", "test_val_4", "15-30", "15-30", "1.5/10", "2.3--5")
+            DataFrame expected = DataFrame
+                    .foldByColumn("test_col", "tdp (W)")
+                    .of("test_val_1", "test_val_2", "test_val_3", "test_val_4", 15.0, 15.0, 1.5, 2.3)
+
+        when:
+            DataFrame output = this.ua.extractFirstNumber(input)
+
+        then:
+        Helpers.assertEqualDF(expected, output)
+
     }
 
 }
diff --git a/...uInfoFetcher/utils/WebrequestsTest.groovy → ...uinfofetcher/utils/WebrequestsTest.groovy b/...uInfoFetcher/utils/WebrequestsTest.groovy → ...uinfofetcher/utils/WebrequestsTest.groovy