11package org.cpuinfofetcher
22import org.cpuinfofetcher.utils.Helpers
33import org.dflib.DataFrame
4+ import org.dflib.Printers
45import java.time.LocalDateTime
56import static org.dflib.Exp.$col
7+ import java.time.Year
8+
9+ /**
10+ * Adapts columns with units to be more uniform
11+ */
12+ class UnitsAdapter {
13+
14+ static DataFrame unitToColumnName (DataFrame df , Map<String , String []> unit_mapping ) {
15+ List<String > old_col_names = df. getColumnsIndex(). toArray()
16+ // Define new column names with units
17+ List<String > new_col_names = []
18+ for (String col_name : old_col_names) {
19+ List<String > units = unit_mapping. get(col_name)
20+ if (units == null || col_name. endsWith(" (${ units.get(0)} )" )) {
21+ new_col_names. add(col_name)
22+ } else {
23+ new_col_names. add(" ${ col_name} (${ units.get(0)} )" )
24+ }
25+ }
26+
27+ // Extract units from values
28+ def new_df = DataFrame . byArrayRow(* new_col_names). appender()
29+ for (int i = 0 ; i < df. height(); i++ ) {
30+ List row = []
31+ for (int j = 0 ; j < df. width(); j++ ) {
32+ List<String > units = unit_mapping. get(old_col_names. get(j))
33+ String value = df. get(j, i)
34+ if (units != null ) {
35+ for (String unit : units) {
36+ value = value. replaceAll(" ${ units.get(0)} " , ' ' ). replaceAll(' ' , ' ' )
37+ }
38+ }
39+ row. add(value)
40+ }
41+
42+ new_df. append(* row)
43+ }
44+ new_df = new_df. toDataFrame()
45+
46+ return new_df
47+ }
48+
49+ }
50+
51+ /**
52+ * Extracts the first numeric value from the 'tdp (W)' column and updates the DataFrame.
53+ *
54+ * Examples of extraction:
55+ * - "15-30" --> 15
56+ * - "1.5/20" --> 1.5
57+ * - "3.1--6" --> 3.1
58+ *
59+ * @param df the input DataFrame
60+ * @return a DataFrame with the updated 'tdp (W)' column containing only the first numeric value
61+ */
62+ static DataFrame extractFirstNumber (DataFrame df ) {
63+ DataFrame old_df = df. cols(). selectAs(Map . of(" tdp (W)" , " tdp old" ))
64+ def new_df = DataFrame . empty(" tdp (W)" )
65+
66+ for (int i = 0 ; i < old_df. height(); i++ ) {
67+ String tdp_value = old_df. rows(i). select(). get(" tdp old" , 0 )
68+ // Use the matcher to extract the first number
69+ def matcher = tdp_value =~ / ^[0-9]*\. ?[0-9]+/ // Regex to match the first number (integer or decimal)
70+ def new_tdp_value = matcher. find() ? Double . parseDouble(matcher. group(0 )) : null
71+ new_df = new_df. addRow(" tdp (W)" : new_tdp_value)
72+
73+ }
74+ new_df = old_df. hConcat(new_df). colsExcept(' tdp old' ). select()
75+
76+ return new_df
77+ }
78+
79+
80+ /**
81+ * Removes duplicate rows from the DataFrame based on the 'name' column.
82+ *
83+ * @param specifications the DataFrame to process.
84+ * @return a DataFrame with unique rows by 'name'.
85+ */
86+
87+ static DataFrame removeDuplicates (DataFrame specifications ) {
88+ return specifications. rows(). selectUnique(' name' )
89+ }
90+
91+
692
793/**
894 * Adds default TDP values to the specifications DataFrame.
@@ -14,7 +100,14 @@ import static org.dflib.Exp.$col
14100 * @return the updated DataFrame with default TDP values
15101 */
16102static DataFrame computeDefaultTdps (DataFrame specifications ) {
17- DataFrame aggregatedDf = specifications. group(' intended_usage' ). agg(
103+ int currentYear = Year . now(). getValue()
104+
105+ DataFrame specifications_filtered = specifications. rows({ row ->
106+ def year = row. get(" Launch Year/Last Time Buy" ) as Integer
107+ return year != null && year >= currentYear - 10
108+ }). select()
109+
110+ DataFrame aggregatedDf = specifications_filtered. group(' intended_usage' ). agg(
18111 $col(' intended_usage' ). first(). as(' intended_usage' ),
19112 $col(' cores' ). castAsInt(). avg(). as(" avg_cores" ),
20113 $col(' threads' ). castAsInt(). avg(). as(" avg_threads" ),
@@ -55,4 +148,91 @@ static DataFrame computeDefaultTdps(DataFrame specifications) {
55148 }
56149
57150 return specifications
58- }
151+ }
152+
153+
154+
155+ static DataFrame extractUniformYearColumn (DataFrame df ) {
156+ // Helper method to extract year from various formats
157+ // Match and parse specific patterns
158+ def extractYear = { value ->
159+ if (value == null || value. toString(). trim(). isEmpty()) {
160+ return null // Handle null or empty input
161+ }
162+
163+ value = value. toString(). trim() // Ensure the value is a trimmed string
164+
165+ if (value =~ / ^\d {4}$/ ) { // Matches "2023" (4-digit year)
166+ return value
167+ } else if (value =~ / ^\d {1,2}\/\d {1,2}\/\d {2,4}$/ ) { // Matches "3/22/22" or "03/15/2021"
168+ def parts = value. split(" /" )
169+ return parts[-1 ]. length() == 2 ? " 20${ parts[-1]} " : parts[-1 ] // Handle yy or yyyy
170+ } else if (value =~ / ^Q[1-4]'\d {2}$/ ) { // Matches "Q2'22" (quarter-year format with short year)
171+ return " 20" + value[-2 .. -1 ]
172+ } else if (value =~ / ^Q[1-4]\d {4}$/ ) { // Matches "Q12021" (quarter-year full format)
173+ return value[-4 .. -1 ]
174+ } else if (value =~ / ^Q[1-4]\s ?\d {2,4}$/ ) { // Matches "Q217" or "Q2 2026"
175+ def extractedYear = value. replaceAll(/ Q[1-4]\s ?/ , " " ) // Extract the number after "Q"
176+ return extractedYear. length() == 2 ? " 20" + extractedYear : extractedYear
177+ } else if (value =~ / ^[1-4]Q\s\d {4}$/ ) { // Matches "3Q 2016"
178+ return value[-4 .. -1 ]
179+ } else if (value =~ / ^[A-Za-z]+\s\d {4}$/ ) { // Matches "June 2017" (Month-Year format)
180+ return value. replaceAll(/ [^\d ]/ , " " ) // Remove non-digit characters, keep year
181+ } else if (value =~ / ^\d {1,2}\/\d {4}$/ ) { // Matches "06/2017" (MM/YYYY format)
182+ return value. split(" /" )[1 ] // Extract year from MM/YYYY format
183+ } else if (value =~ / ^[1-4]Q\d {2}$/ ) { // Matches "2Q18" (Quarter-Year with short year)
184+ return " 20" + value[-2 .. -1 ]
185+ } else if (value =~ / ^\d {2}'\d {2}$/ ) { // Matches "04'16" (Month-Year short year MM'YY)
186+ return " 20" + value[-2 .. -1 ] // Extract "16" and convert to "2016"
187+ } else if (value =~ / \b\d {4}\b / ) { // Matches all 4-digit years in a string
188+ def matcher = (value =~ / \b\d {4}\b / ) // Find all 4-digit years
189+ def allYears = matcher. collect { it. toInteger() } // Collect all matched years as integers
190+ return allYears. min() // Return the earliest year (minimum)
191+ } else {
192+ return null // Return null if no patterns match
193+ }
194+ }
195+
196+ def new_df = DataFrame . empty(" Launch Year/Last Time Buy" )
197+
198+ // Create a new column that contains the uniform year format
199+ df. rows(). select(). each { row ->
200+ def launchDate = row. get(" Launch Date" ) // Get value for "Launch Date"
201+ def lastTimeBuy = row. get(" Last Time Buy" ) // Get value for "Last Time Buy"
202+
203+ // Check for "Launch Date" or fall back to "Last Time Buy"
204+ Integer year = extractYear(launchDate ?: lastTimeBuy) as Integer
205+ new_df = new_df. addRow(" Launch Year/Last Time Buy" : year)
206+ }
207+ // new_df = df.hConcat(new_df).colsExcept("Launch Date", "Last Time Buy").select()
208+ new_df = df. hConcat(new_df)
209+
210+ // Iterate through the rows and print results
211+ for (int i = 0 ; i < new_df. height(); i++ ) {
212+ def name = new_df. getColumn(" name" ). get(i) // # Processor name
213+ def launchDate = new_df. getColumn(" Launch Date" ). get(i) // Original "Launch Date"
214+ def lastTimeBuy = new_df. getColumn(" Last Time Buy" ). get(i) // Original "Last Time Buy"
215+ def uniformYear = new_df. getColumn(" Launch Year/Last Time Buy" ). get(i) // New "Uniform Launch Year"
216+
217+
218+ if ((! (launchDate == null || launchDate. toString(). trim(). isEmpty())
219+ || ! (lastTimeBuy == null || lastTimeBuy. toString(). trim(). isEmpty()))
220+ && (uniformYear == null || uniformYear. toString(). trim(). isEmpty())) {
221+ println " Warning: ${ name} -> No valid uniform year found! (Launch Date: $launchDate , Last Time Buy: $lastTimeBuy , Uniform Year: $uniformYear )"
222+ }
223+
224+ // println "Processor: $name, Uniform Year: $uniformYear (Launch Date: $launchDate, Last Time Buy: $lastTimeBuy)"
225+ // if (uniformYear != null) {
226+ // println "Row ${i + 1}: Uniform Year is $uniformYear (Launch Date: $launchDate, Last Time Buy: $lastTimeBuy)"
227+ // } else {
228+ // println "Row ${i + 1}: Warning - No valid value found in 'Launch Date' or 'Last Time Buy'! (Launch Date: $launchDate, Last Time Buy: $lastTimeBuy)"
229+ // }
230+ }
231+
232+ // String table = Printers.tabular.toString(new_df.head(10));
233+ // System.out.println(table);
234+ return new_df
235+ }
236+
237+
238+
0 commit comments