Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions app/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ repositories {
mavenCentral()
}

sourceSets {
main.resources.srcDir('src/resources')
}

dependencies {
// Use the latest Groovy version for building this library
implementation libs.groovy.all
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,9 @@ class AmpereSpecificationsFetcher extends SpecificationsFetcher {
)

// Load local specifications file as it is only available as .png on the website
DataFrame spec_file = Csv.load(Paths.get('..', 'input_data', 'Ampere_One_Family_Specifications_2025-03-20.csv'))
DataFrame spec_file = Csv.load(
Paths.get(this.class.getResource('/Ampere_One_Family_Specifications_2025-03-20.csv').toURI())
)
specifications = manually_add_processor_specifications(specifications, spec_file)

return specifications
Expand Down
21 changes: 21 additions & 0 deletions app/src/main/groovy/org/cpuInfoFetcher/Main.groovy
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
package org.cpuinfofetcher

import org.cpuinfofetcher.utils.Helpers
import org.cpuinfofetcher.utils.UnitsAdapter
import org.dflib.Exp
import org.dflib.Series
import org.dflib.Printers

import static org.dflib.Exp.*
Comment thread
JosuaCarl marked this conversation as resolved.
import java.time.LocalDateTime


import java.nio.file.Files
import java.util.logging.Logger
Expand Down Expand Up @@ -81,10 +89,13 @@ class Main {
return specifications.rows().selectUnique('name')
}



static void main(String[] args) {
this.days_until_outdated = args.length > 0 ? Integer.parseInt(args[0]) : 28

Files.createDirectories(Paths.get('..', 'specifications_out'))
Files.createDirectories(Paths.get('..', 'nf-co2footprint'))

// Collecting Info
LOGGER.entering('Main', 'main')
Expand All @@ -111,11 +122,21 @@ class Main {
selected_specifications = ua.unitToColumnName(selected_specifications, this.units_mapping)
LOGGER.info('Extracted units from data.')

// adjusts format of tdp values to make them uniform
selected_specifications = ua.extractFirstNumber(selected_specifications)

// add default TDPs
selected_specifications = ProcessSpecificationsTable.computeDefaultTdps(selected_specifications)
LOGGER.info('Added default TDP values.')

Csv.save(selected_specifications, Paths.get('..', 'specifications_out', 'specifications_filtered.csv'))
Csv.save(selected_specifications, Paths.get('..', 'nf-co2footprint', 'CPU_TDP.csv'))

this.selected_specifications = selected_specifications
LOGGER.info('Saved final results.')



LOGGER.exiting('Main', 'main')
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package org.cpuinfofetcher
import org.cpuinfofetcher.utils.Helpers
import org.dflib.DataFrame
import java.time.LocalDateTime
import static org.dflib.Exp.$col

/**
* Adds default TDP values to the specifications DataFrame.
*
* Groups by 'intended_usage', computes averages for cores, threads,
* and TDP, adds a row for "unknown" usage, and updates the DataFrame with default entries.
*
* @param specifications the input DataFrame
* @return the updated DataFrame with default TDP values
*/
static DataFrame computeDefaultTdps(DataFrame specifications) {
DataFrame aggregatedDf = specifications.group('intended_usage').agg(
$col('intended_usage').first().as('intended_usage'),
$col('cores').castAsInt().avg().as("avg_cores"),
$col('threads').castAsInt().avg().as("avg_threads"),
$col('tdp (W)').castAsDouble().avg().as("avg_tdp"))

DataFrame local_server_rows = aggregatedDf.rows({ it.get('intended_usage') == 'local' || it.get('intended_usage') == 'server' })
.select()

Double unknown_avg_cores = (local_server_rows.sum { it.get('avg_cores') } as Double) / local_server_rows.height()
Double unknown_avg_threads = (local_server_rows.sum { it.get('avg_threads') } as Double) / local_server_rows.height()
Double unknown_avg_tdp = (local_server_rows.sum { it.get('avg_tdp') } as Double) / local_server_rows.height()

aggregatedDf = aggregatedDf.addRow([
"intended_usage": "unknown",
"avg_cores": unknown_avg_cores,
"avg_threads": unknown_avg_threads,
"avg_tdp": unknown_avg_tdp

])

for (int i = 0; i < aggregatedDf.height(); i++) {
DataFrame row = aggregatedDf.rows(i).select()
Double avgThreads = row.get("avg_threads", 0) as Double
Double avgCores = row.get("avg_cores", 0) as Double
Double avgtdp = row.get("avg_tdp", 0) as Double
Double computedThreads = avgCores != 0 ? avgThreads / avgCores : 0
String intended_usage = row.get("intended_usage", 0)
specifications = specifications.addRow([
'product_id': "default $intended_usage",
'name': "default $intended_usage",
"time": LocalDateTime.now().toString(),
'source': "default $intended_usage",
"intended_usage": "default $intended_usage",
'tdp (W)': Helpers.round(avgtdp),
"cores": 1,
"threads": Helpers.round(computedThreads)
])
}

return specifications
}
26 changes: 26 additions & 0 deletions app/src/main/groovy/org/cpuInfoFetcher/utils/Helpers.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package org.cpuinfofetcher.utils

import org.dflib.DataFrame

static boolean assertEqualDF(DataFrame df1, DataFrame df2) {
assert df1.size() == df2.size()
assert df1.getColumnsIndex().toArray() == df2.getColumnsIndex().toArray()
for (int i = 0; i < df1.width(); i++) {
for (int j = 0; j < df1.height(); j++) {
assert df1.get(i, j) == df2.get(i, j)
}
}
return true
}

/**
* Rounds a Double value to two decimal places.
*
* @param value the Double to round
* @return the value rounded to two decimal places
*/
static Double round(Double value) {
Double rounded_value = Math.round(value * 100.0) / 100.0
return rounded_value
}

30 changes: 29 additions & 1 deletion app/src/main/groovy/org/cpuInfoFetcher/utils/UnitsAdapter.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import org.dflib.DataFrame
*/
class UnitsAdapter {

DataFrame unitToColumnName(DataFrame df, Map<String, String[]> unit_mapping) {
static DataFrame unitToColumnName(DataFrame df, Map<String, String[]> unit_mapping) {
List<String> old_col_names = df.getColumnsIndex().toArray()
// Define new column names with units
List<String> new_col_names = []
Expand Down Expand Up @@ -42,4 +42,32 @@ class UnitsAdapter {
return new_df
}

/**
* Extracts the first numeric value from the 'tdp (W)' column and updates the DataFrame.
*
* Examples of extraction:
* - "15-30" --> 15
* - "1.5/20" --> 1.5
* - "3.1--6" --> 3.1
*
* @param df the input DataFrame
* @return a DataFrame with the updated 'tdp (W)' column containing only the first numeric value
*/
static DataFrame extractFirstNumber(DataFrame df) {
DataFrame old_df = df.cols().selectAs(Map.of("tdp (W)", "tdp old"))
def new_df = DataFrame.empty("tdp (W)")

for (int i = 0; i < old_df.height(); i++) {
String tdp_value = old_df.rows(i).select().get("tdp old", 0)
// Use the matcher to extract the first number
def matcher = tdp_value =~ /^[0-9]*\.?[0-9]+/ // Regex to match the first number (integer or decimal)
def new_tdp_value = matcher.find() ? Double.parseDouble(matcher.group(0)) : null
new_df = new_df.addRow("tdp (W)": new_tdp_value)

}
new_df = old_df.hConcat(new_df).colsExcept('tdp old').select()

return new_df
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class AMDSpecificationsFetcherTest extends Specification {
DataFrame specifications = Csv.load(this.tempPath.resolve('AMD_processor_specifications.csv'))

then:
specifications.getColumnsIndex().toArray() == [
specifications.getColumnsIndex().toList() == [
'time', 'source', 'intended_usage', 'name', 'Family', 'Series', 'Form Factor', '# of CPU Cores', '# of Threads',
'Max. Boost Clock', 'Base Clock', 'L2 Cache', 'L3 Cache', 'Default TDP', 'L1 Cache',
'AMD Configurable TDP (cTDP)', 'Processor Technology for CPU Cores', 'Unlocked for Overclocking',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class AmpereSpecificationsFetcherTest extends Specification {
DataFrame specifications = Csv.load(this.tempPath.resolve('Ampere_cpu_specifications.csv'))

then:
specifications.getColumnsIndex().toArray() == [
specifications.getColumnsIndex().toList() == [
'time', 'source' , 'intended_usage', 'name', 'CORES', 'SUSTAINED FREQUENCY (GHz)', 'USAGE POWER (W)', 'product_id'
]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class CPUSpecificationsSummarizerTest extends Specification {
private final CPUSpecificationsSummarizer summarizer = new CPUSpecificationsSummarizer()

private final DataFrame exampleDF = DataFrame.foldByRow('A', 'B').of('1', '2')
private final Map<String, String[]> aliases = ['A': ['A'], 'C': ['X', 'B'], 'B': ['C']]
private final Map<String, String[]> aliases = ['A': ['A'], 'C': ['X', 'B'], 'B': ['C']] as Map<String, String[]>

boolean assertEqualDF(DataFrame df1, DataFrame df2) {
assert df1.size() == df2.size()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class IntelSpecificationsFetcherTest extends Specification {
DataFrame specifications = Csv.load(this.tempPath.resolve('Intel_family_info.csv'))

then:
specifications.getColumnsIndex().toArray() == [
specifications.getColumnsIndex().toList() == [
'product_id', 'name', 'time', 'source', 'intended_usage', 'url'
]
}
Expand Down Expand Up @@ -94,7 +94,7 @@ class IntelSpecificationsFetcherTest extends Specification {
DataFrame specifications = Csv.load(this.tempPath.resolve('Intel_Processor_Useries.csv'))

then:
specifications.getColumnsIndex().toArray() == [
specifications.getColumnsIndex().toList() == [
'product_id', 'name', 'time', 'source', 'intended_usage', 'url'
]
}
Expand Down Expand Up @@ -130,7 +130,7 @@ class IntelSpecificationsFetcherTest extends Specification {
DataFrame specifications = Csv.load(this.tempPath.resolve('some_name.csv'))

then:
specifications.getColumnsIndex().toArray() == [
specifications.getColumnsIndex().toList() == [
'product_id', 'name', 'time', 'source', 'intended_usage', 'Product Collection', 'Code Name', 'Vertical Segment',
'Processor Number', 'Lithography', 'Total Cores', 'Processor Base Frequency', 'Cache', 'Bus Speed',
'FSB Parity', 'TDP', 'VID Voltage Range', 'Marketing Status', 'Launch Date', 'Servicing Status',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package org.cpuinfofetcher

import org.cpuinfofetcher.utils.Helpers
import org.dflib.DataFrame
import spock.lang.Specification

class ProcessSpecificationsTableTest extends Specification {

def 'check default tdp computation'() {
setup:
DataFrame input = DataFrame
.foldByColumn("intended_usage", "cores", "threads", "tdp (W)")
.of("local", "server", "embedded", "local", "server", "embedded",
2, 8, 4, 4, 16, 4,
2, 16, 8, 4, 32, 8,
15.0, 15.0, 1.5, 15.0, 10.0, 1.5)
DataFrame expected = DataFrame
.foldByColumn("intended_usage", "cores", "threads", "tdp (W)")
.of("local", "server", "embedded", "local", "server", "embedded", "default local", "default server", "default embedded", "default unknown",
2, 8, 4, 4, 16, 4, 1, 1, 1, 1,
2, 16, 8, 4, 32, 8, 1, 2.0, 2.0, 1.8,
15.0, 15.0, 1.5, 15.0, 10.0, 1.5, 15.0, 12.5, 1.5, 13.75)

when:
DataFrame output = ProcessSpecificationsTable.computeDefaultTdps(input)

then:
Helpers.assertEqualDF(expected, output)

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import spock.lang.Specification
* @version 1.0
* @since 1.0
*/
public class SpecificationsFetcherTest extends Specification {
class SpecificationsFetcherTest extends Specification {

private final SpecificationsFetcher sf = new SpecificationsFetcher()
private final Path tempPath = Files.createTempFile('file', '.tmp')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,6 @@ class UnitsAdapterTest extends Specification {
private final DataFrame exampleDF = DataFrame.foldByRow('A', 'B', 'C').of('1 W', '2 Beta', '3')
private final Map<String, List<String>> units = ['A': ['W', 'Watt'], 'C': ['C', 'Coloumb'], 'X': ['I', 'Imaginary']]

boolean assertEqualDF(DataFrame df1, DataFrame df2) {
assert df1.size() == df2.size()
assert df1.getColumnsIndex().toArray() == df2.getColumnsIndex().toArray()
for (int i = 0; i < df1.width(); i++) {
for (int j = 0; j < df1.height(); j++) {
assert df1.get(i, j) == df2.get(i, j)
}
}
return true
}

def 'check extraction of units'() {
setup:
Expand All @@ -33,7 +23,24 @@ class UnitsAdapterTest extends Specification {
DataFrame result = this.ua.unitToColumnName(this.exampleDF, this.units)

then:
assertEqualDF(result, expected)
Helpers.assertEqualDF(result, expected)
}

def 'check tdp value extraction'() {
setup:
DataFrame input = DataFrame
.foldByColumn("test_col", "tdp (W)")
.of("test_val_1", "test_val_2", "test_val_3", "test_val_4", "15-30", "15-30", "1.5/10", "2.3--5")
DataFrame expected = DataFrame
.foldByColumn("test_col", "tdp (W)")
.of("test_val_1", "test_val_2", "test_val_3", "test_val_4", 15.0, 15.0, 1.5, 2.3)

when:
DataFrame output = this.ua.extractFirstNumber(input)

then:
Helpers.assertEqualDF(expected, output)

}

}
Loading