-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathnormalize_hash_rates.py
More file actions
223 lines (181 loc) · 7.74 KB
/
normalize_hash_rates.py
File metadata and controls
223 lines (181 loc) · 7.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
#!/usr/bin/env python3
"""
Normalize Hash Rate Data
Converts all hash rate values to consistent units and outputs clean CSV
"""
import re
import csv
from datetime import datetime
INPUT_FILE = "hash_rate_history.txt"
OUTPUT_FILE = "normalized_hash_rates.csv"
def parse_hash_value(value_str):
"""
Parse a hash rate string and convert to base units
Returns value in base unit (H/s for hashes, g/s for graphs)
Examples:
"250 MH" -> 250000000 (250 million H/s)
"3.70 GH" -> 3700000000 (3.7 billion H/s)
"47.0 TH" -> 47000000000000 (47 trillion H/s)
"120 Kg" -> 120000 (120 thousand g/s)
"""
if not value_str or value_str == 'N/A':
return None
# Clean up the string
value_str = value_str.strip()
# Handle special cases like "kgraphs"
value_str = value_str.replace('kgraphs', 'Kg')
# Extract number and unit
match = re.match(r'([\d.]+)\s*([A-Za-z/]+)', value_str)
if not match:
return None
number = float(match.group(1))
unit = match.group(2)
# Define multipliers for hash units (H, MH, GH, TH, etc.)
hash_multipliers = {
'H': 1,
'kH': 1e3,
'KH': 1e3,
'MH': 1e6,
'GH': 1e9,
'TH': 1e12,
'PH': 1e15,
'EH': 1e18
}
# Define multipliers for graph units (g, Kg, Mg, etc.)
graph_multipliers = {
'g': 1,
'kg': 1e3,
'Kg': 1e3,
'KG': 1e3,
'Mg': 1e6,
'MG': 1e6,
'Gg': 1e9,
'GG': 1e9
}
# Remove /s suffix if present
unit_clean = unit.replace('/s', '')
# Try hash units first
if unit_clean in hash_multipliers:
return number * hash_multipliers[unit_clean]
# Try graph units
if unit_clean in graph_multipliers:
return number * graph_multipliers[unit_clean]
print(f"Warning: Unknown unit '{unit}' in value '{value_str}'")
return None
def convert_to_display_unit(value, unit_type='hash'):
"""
Convert base value to appropriate display unit
unit_type: 'hash' for H/s units, 'graph' for g/s units
Returns: tuple (value, unit_string)
"""
if value is None:
return None, None
if unit_type == 'hash':
# Convert to TH/s (terahashes per second)
return value / 1e12, 'TH'
elif unit_type == 'graph':
# Convert to Kg/s (kilographs per second)
return value / 1e3, 'Kg'
return value, ''
def parse_entry(text):
"""
Parse a single hash rate entry and extract all fields
"""
# Extract date
date_match = re.search(r'Date:\s*(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})', text)
if not date_match:
return None
date_str = date_match.group(1)
# Extract block height
block_match = re.search(r'Block Height:\s*([\d,]+)', text)
block_height = block_match.group(1).replace(',', '') if block_match else None
# Extract SHA3x or SHA3 Hash Rate
sha_match = re.search(r'SHA3(?:x)?(?: Hash Rate)?:\s*([\d.]+\s*[A-Za-z/]+)', text)
sha_value = parse_hash_value(sha_match.group(1)) if sha_match else None
# Extract RandomX (Tari) - could be labeled as "RandomX (Tari)" or "RandomX (Tari) Hash Rate"
rxt_match = re.search(r'RandomX \(Tari\)(?: Hash Rate)?:\s*([\d.]+\s*[A-Za-z/]+)', text)
rxt_value = parse_hash_value(rxt_match.group(1)) if rxt_match else None
# Extract RandomX (Merged-Mined XMR) - could have various labels
rxm_match = re.search(r'RandomX \(Merged-Mined XMR\)(?: Hash Rate)?:\s*([\d.]+\s*[A-Za-z/]+)', text)
rxm_value = parse_hash_value(rxm_match.group(1)) if rxm_match else None
# Early format (May 2025): just "RandomX Hash Rate" without Tari/XMR distinction
# Initially only SHA3 and RXM (merged-mined XMR) existed, RXT (Tari) came later
if not rxt_match and not rxm_match:
early_rx_match = re.search(r'RandomX Hash Rate:\s*([\d.]+\s*[A-Za-z/]+)', text)
if early_rx_match:
# In early format, this was the merged-mined XMR RandomX value
rxm_value = parse_hash_value(early_rx_match.group(1))
rxt_value = None # Tari RandomX wasn't tracked separately yet
# Extract Cuckaroo 29
c29_match = re.search(r'Cuckaroo 29:\s*([\d.]+\s*[A-Za-z/]+)', text)
c29_value = parse_hash_value(c29_match.group(1)) if c29_match else None
# Convert to display units
rxt_display, _ = convert_to_display_unit(rxt_value, 'hash') if rxt_value else (None, None)
rxm_display, _ = convert_to_display_unit(rxm_value, 'hash') if rxm_value else (None, None)
sha_display, _ = convert_to_display_unit(sha_value, 'hash') if sha_value else (None, None)
c29_display, _ = convert_to_display_unit(c29_value, 'graph') if c29_value else (None, None)
return {
'date': date_str,
'block_height': block_height,
'net_sha3_th': round(sha_display, 2) if sha_display else None,
'net_rxt_gh': round(rxt_display * 1000, 2) if rxt_display else None, # TH to GH
'net_rxm_gh': round(rxm_display * 1000, 2) if rxm_display else None, # TH to GH
'net_c29_kg': round(c29_display, 2) if c29_display else None
}
def main():
print("Reading hash rate history...")
with open(INPUT_FILE, 'r', encoding='utf-8') as f:
content = f.read()
# Split by the separator lines
entries = content.split('='*80)
parsed_data = []
for entry in entries:
if 'Date:' in entry and 'Block Height:' in entry:
parsed = parse_entry(entry)
if parsed:
parsed_data.append(parsed)
print(f"Parsed {len(parsed_data)} entries")
# Sort by date
parsed_data.sort(key=lambda x: x['date'])
# Write to CSV
with open(OUTPUT_FILE, 'w', newline='', encoding='utf-8') as f:
fieldnames = ['date', 'block_height', 'net_sha3_th', 'net_rxt_gh', 'net_rxm_gh', 'net_c29_kg']
writer = csv.DictWriter(f, fieldnames=fieldnames)
# Write header with units
f.write('Date,Block Height,net_sha3 (TH/s),net_rxt (GH/s),net_rxm (GH/s),net_c29 (Kg/s)\n')
for row in parsed_data:
# Format None values as empty strings
formatted_row = {
'date': row['date'],
'block_height': row['block_height'] if row['block_height'] else '',
'net_sha3_th': row['net_sha3_th'] if row['net_sha3_th'] is not None else '',
'net_rxt_gh': row['net_rxt_gh'] if row['net_rxt_gh'] is not None else '',
'net_rxm_gh': row['net_rxm_gh'] if row['net_rxm_gh'] is not None else '',
'net_c29_kg': row['net_c29_kg'] if row['net_c29_kg'] is not None else ''
}
writer.writerow(formatted_row)
print(f"\n✅ Normalized data written to: {OUTPUT_FILE}")
print(f"\nColumn headers:")
print(f" Date - Timestamp of measurement")
print(f" Block Height - Blockchain block number")
print(f" net_sha3 (TH/s) - SHA3x hash rate in terahashes per second")
print(f" net_rxt (GH/s) - RandomX (Tari) hash rate in gigahashes per second")
print(f" net_rxm (GH/s) - RandomX (Merged-Mined XMR) in gigahashes per second")
print(f" net_c29 (Kg/s) - Cuckaroo 29 in kilographs per second")
# Show first few rows as sample
print(f"\nFirst 5 rows:")
print("-" * 100)
with open(OUTPUT_FILE, 'r') as f:
for i, line in enumerate(f):
if i < 6: # Header + 5 rows
print(line.rstrip())
else:
break
print(f"\nLast 5 rows:")
print("-" * 100)
with open(OUTPUT_FILE, 'r') as f:
lines = f.readlines()
for line in lines[-5:]:
print(line.rstrip())
if __name__ == "__main__":
main()