Skip to content

Commit c7d3226

Browse files
committed
Update build scripts
1 parent a66cec8 commit c7d3226

9 files changed

Lines changed: 149 additions & 120 deletions

File tree

build/csvtables.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from pathlib import Path
22

3-
from utils import (filter_row, filter_rows, read_definitions, write_csv)
3+
from utils import filter_row, filter_rows, read_definitions, write_csv, setup_logs
44

5+
setup_logs()
56

67
def main():
78
definitions = read_definitions()
@@ -12,9 +13,14 @@ def main():
1213

1314
for simulation_round in simulation_rounds:
1415
for sector in sectors:
15-
output_path = Path('output').joinpath('csv') \
16-
.joinpath(simulation_round).joinpath('OutputData').joinpath(sector) \
17-
.with_suffix('.csv')
16+
output_path = (
17+
Path('output')
18+
.joinpath('csv')
19+
.joinpath(simulation_round)
20+
.joinpath('OutputData')
21+
.joinpath(sector)
22+
.with_suffix('.csv')
23+
)
1824

1925
variable_definitions = []
2026
variable_fieldnames = ['group', 'specifier', 'long_name', 'units', 'resolution', 'frequency',

build/definitions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
from pathlib import Path
22

3-
from utils import (filter_row, filter_rows, get_commit_hash, read_definitions,
4-
write_json)
3+
from utils import filter_row, filter_rows, get_commit_hash, read_definitions, write_json, setup_logs
54

5+
setup_logs()
66

77
def main():
88
definitions = read_definitions()

build/glossary.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from pathlib import Path
22

3-
from utils import get_commit_hash, read_definitions, write_json
3+
from utils import get_commit_hash, read_definitions, setup_logs, write_json
44

5+
setup_logs()
56

67
def main():
78
glossary = {

build/pattern.py

Lines changed: 38 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,47 @@
1-
import os
21
import re
32
from pathlib import Path
43

5-
from utils import get_commit_hash, write_json, read_yaml_file
4+
from utils import get_commit_hash, write_json, read_yaml_file, setup_logs
65

6+
setup_logs()
77

88
def main():
9-
for root, dirs, files in os.walk('pattern'):
10-
for file_name in files:
11-
pattern_path = Path(root) / file_name
12-
output_path = (Path('output') / pattern_path).with_suffix('.json')
13-
14-
# create a pattern json from scratch
15-
pattern_json = {
16-
'commit': get_commit_hash()
17-
}
18-
19-
# step 2: open and read pattern
20-
pattern = read_yaml_file(pattern_path)
21-
22-
suffix = pattern.get('suffix') or ['.nc']
23-
24-
path_pattern = pattern.get('path', '''
25-
(?P<simulation_round>[A-Za-z0-9]+)
26-
/(?P<product>[A-Za-z]+)
27-
/(?P<sector>[a-z0-9-_]+)
28-
/(?P<model>[A-Za-z0-9-+._]+)
29-
/(?P<climate_forcing>[a-z0-9-]+)
30-
/(?P<period>[a-z0-9-_]+)
31-
''') + '$'
32-
dataset_pattern = '^' + pattern['dataset']
33-
file_pattern = '^' + pattern['file'] + '(' + '|'.join(suffix) + ')$'
34-
35-
# remove whitespaces
36-
path_pattern = re.sub(r'\s+', '', path_pattern)
37-
dataset_pattern = re.sub(r'\s+', '', dataset_pattern)
38-
file_pattern = re.sub(r'\s+', '', file_pattern)
39-
40-
pattern_json = {
41-
'path': path_pattern,
42-
'dataset': dataset_pattern,
43-
'file': file_pattern,
44-
'suffix': suffix,
45-
'specifiers': pattern.get('specifiers', {}),
46-
'specifiers_map': pattern.get('specifiers_map', {})
47-
}
48-
49-
# step 3: write json file
50-
write_json(output_path, pattern_json)
9+
for pattern_path in Path('pattern').rglob('**/*.yaml'):
10+
output_path = (Path('output') / pattern_path).with_suffix('.json')
11+
12+
# open and read pattern
13+
pattern = read_yaml_file(pattern_path)
14+
15+
suffix = pattern.get('suffix') or ['.nc']
16+
17+
path_pattern = pattern.get('path', '''
18+
(?P<simulation_round>[A-Za-z0-9]+)
19+
/(?P<product>[A-Za-z]+)
20+
/(?P<sector>[a-z0-9-_]+)
21+
/(?P<model>[A-Za-z0-9-+._]+)
22+
/(?P<climate_forcing>[a-z0-9-]+)
23+
/(?P<period>[a-z0-9-_]+)
24+
''') + '$'
25+
dataset_pattern = '^' + pattern['dataset']
26+
file_pattern = '^' + pattern['file'] + '(' + '|'.join(suffix) + ')$'
27+
28+
# remove whitespaces
29+
path_pattern = re.sub(r'\s+', '', path_pattern)
30+
dataset_pattern = re.sub(r'\s+', '', dataset_pattern)
31+
file_pattern = re.sub(r'\s+', '', file_pattern)
32+
33+
pattern_dict = {
34+
'commit': get_commit_hash(),
35+
'path': path_pattern,
36+
'dataset': dataset_pattern,
37+
'file': file_pattern,
38+
'suffix': suffix,
39+
'specifiers': pattern.get('specifiers', {}),
40+
'specifiers_map': pattern.get('specifiers_map', {})
41+
}
42+
43+
# write pattern as json
44+
write_json(output_path, pattern_dict)
5145

5246

5347
if __name__ == "__main__":

build/protocol.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
import json
2+
import logging
23
from pathlib import Path
34

45
from jinja2 import Environment, FileSystemLoader
56
from markdown import markdown
67
from markdown.extensions.toc import TocExtension
7-
from utils import get_commit_date, get_commit_hash, read_definitions, read_patterns
8+
from utils import get_commit_date, get_commit_hash, read_definitions, read_patterns, setup_logs
89

910
from customblocks import CustomBlocksExtension
1011
from customblocks.utils import E, Markdown
1112

13+
setup_logs()
1214

1315
URL = 'https://github.com/ISI-MIP/isimip-protocol-3'
1416

@@ -86,11 +88,17 @@ def main():
8688
output_path.parent.mkdir(parents=True, exist_ok=True)
8789
environment = Environment(loader=FileSystemLoader(['templates']))
8890

91+
logging.debug('read %s', template_path)
8992
with open(template_path, encoding='utf-8') as f:
9093
template = environment.from_string(f.read())
9194

95+
logging.info('write %s', output_path)
9296
with open(output_path, 'w', encoding='utf-8') as f:
93-
f.write(template.render(html=html, definitions=json.dumps(definitions, indent=2), patterns=json.dumps(patterns, indent=2)))
97+
f.write(template.render(
98+
html=html,
99+
definitions=json.dumps(definitions, indent=2),
100+
patterns=json.dumps(patterns, indent=2)
101+
))
94102

95103

96104
if __name__ == "__main__":

build/schema.py

Lines changed: 46 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,61 +1,59 @@
1-
import json
2-
import os
31
from pathlib import Path
42

5-
from utils import filter_rows, get_commit_hash, read_definitions, read_yaml_file, write_json
3+
from utils import filter_rows, get_commit_hash, read_definitions, read_yaml_file, setup_logs, write_json
64

75
URL = 'https://protocol.isimip.org/schema/'
86
EXCLUDE = ['model']
97

8+
setup_logs()
109

1110
def main():
1211
definitions = read_definitions()
1312

14-
for root, dirs, files in os.walk('schema'):
15-
for file_name in files:
16-
schema_path = Path(root) / file_name
17-
schema_path_components = schema_path.with_suffix('').parts
18-
output_path = (Path('output') / schema_path).with_suffix('.json')
19-
20-
simulation_round = schema_path_components[1]
21-
product = schema_path_components[2]
22-
if product.endswith('InputData'):
23-
category = schema_path_components[3]
24-
sector = None
25-
else:
26-
category = None
27-
sector = schema_path_components[3]
28-
29-
# step 1: read schema template
30-
schema_template = read_yaml_file(schema_path)
31-
32-
schema = {
33-
'$schema': 'http://json-schema.org/draft-07/schema#',
34-
'$id': URL + schema_path.as_posix(),
35-
'commit': get_commit_hash()
36-
}
37-
schema.update(schema_template)
38-
39-
# step 2: loop over properties/specifiers/properties and add enums from definition files
40-
for identifier, properties in schema['properties']['specifiers']['properties'].items():
41-
if identifier in definitions:
42-
if identifier not in EXCLUDE:
43-
rows = definitions[identifier]
44-
enum = []
45-
if product.endswith('InputData'):
46-
for row in filter_rows(rows, simulation_round, product, category=category):
47-
enum.append(row.get('specifier_file') or row.get('specifier'))
48-
elif product == 'DerivedOutputData':
49-
for row in filter_rows(rows, simulation_round, product):
50-
enum.append(row.get('specifier_file') or row.get('specifier'))
51-
else:
52-
for row in filter_rows(rows, simulation_round, product, sector=sector):
53-
enum.append(row.get('specifier_file') or row.get('specifier'))
54-
55-
properties['enum'] = list(set(enum))
56-
57-
# step 3: write json schema
58-
write_json(output_path, schema)
13+
for schema_path in Path('schema').rglob('**/*.yaml'):
14+
schema_path_components = schema_path.with_suffix('').parts
15+
output_path = (Path('output') / schema_path).with_suffix('.json')
16+
17+
simulation_round = schema_path_components[1]
18+
product = schema_path_components[2]
19+
if product.endswith('InputData'):
20+
category = schema_path_components[3]
21+
sector = None
22+
else:
23+
category = None
24+
sector = schema_path_components[3]
25+
26+
# read schema template
27+
schema_template = read_yaml_file(schema_path)
28+
29+
# create schema dict
30+
schema = {
31+
'$schema': 'http://json-schema.org/draft-07/schema#',
32+
'$id': URL + schema_path.as_posix(),
33+
'commit': get_commit_hash()
34+
}
35+
schema.update(schema_template)
36+
37+
# loop over properties/specifiers/properties and add enums from definition files
38+
for identifier, properties in schema['properties']['specifiers']['properties'].items():
39+
if identifier in definitions:
40+
if identifier not in EXCLUDE:
41+
rows = definitions[identifier]
42+
enum = []
43+
if product.endswith('InputData'):
44+
for row in filter_rows(rows, simulation_round, product, category=category):
45+
enum.append(row.get('specifier_file') or row.get('specifier'))
46+
elif product == 'DerivedOutputData':
47+
for row in filter_rows(rows, simulation_round, product):
48+
enum.append(row.get('specifier_file') or row.get('specifier'))
49+
else:
50+
for row in filter_rows(rows, simulation_round, product, sector=sector):
51+
enum.append(row.get('specifier_file') or row.get('specifier'))
52+
53+
properties['enum'] = list(set(enum))
54+
55+
# write json schema
56+
write_json(output_path, schema)
5957

6058

6159
if __name__ == "__main__":

build/tree.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,28 @@
11
import json
2-
import os
2+
import logging
33
from pathlib import Path
44

5-
from utils import get_commit_hash, write_json
5+
from utils import get_commit_hash, setup_logs, write_json
66

7+
setup_logs()
78

89
def main():
9-
for root, dirs, files in os.walk('tree'):
10-
for file_name in files:
11-
tree_path = Path(root) / file_name
12-
output_path = Path('output') / tree_path
13-
14-
# step 2: open and read pattern
15-
with open(tree_path, encoding='utf-8') as f:
16-
identifiers = json.loads(f.read())
17-
18-
# create a pattern json from scratch
19-
tree_json = {
20-
'commit': get_commit_hash(),
21-
'identifiers': [identifier.replace(' ', '') for identifier in identifiers]
22-
}
23-
24-
# step 3: write json file
25-
write_json(output_path, tree_json)
10+
for tree_path in Path('tree').rglob('**/*.json'):
11+
output_path = Path('output') / tree_path
12+
13+
# open and read tree
14+
logging.debug('read %s', tree_path)
15+
with open(tree_path, encoding='utf-8') as f:
16+
identifiers = json.loads(f.read())
17+
18+
# create tree dict
19+
tree = {
20+
'commit': get_commit_hash(),
21+
'identifiers': [identifier.replace(' ', '') for identifier in identifiers]
22+
}
23+
24+
# write tree as json
25+
write_json(output_path, tree)
2626

2727

2828
if __name__ == "__main__":

build/utils.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,29 @@
11
import csv
22
import json
3+
import logging
4+
import os
35
import re
46
import subprocess
57
from datetime import datetime
68
from pathlib import Path
79

810
import yaml
11+
from rich.logging import RichHandler
12+
13+
14+
def setup_logs():
15+
logging.basicConfig(
16+
level=os.getenv('ISIMIP_LOG_LEVEL', 'INFO'),
17+
format="%(message)s",
18+
datefmt="[%X]",
19+
handlers=[
20+
RichHandler(
21+
show_time=os.getenv('ISIMIP_SHOW_TIME', False),
22+
show_path=os.getenv('ISIMIP_SHOW_PATH', False)
23+
)
24+
]
25+
)
26+
927

1028
def get_commit_hash():
1129
return subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode().strip()
@@ -57,6 +75,7 @@ def filter_row(row, simulation_round, product, category=None, sector=None):
5775

5876

5977
def read_yaml_file(file_path):
78+
logging.debug('read %s', file_path)
6079
try:
6180
return yaml.load(file_path.read_text(encoding='utf-8'), Loader=yaml.CSafeLoader)
6281
except AttributeError:
@@ -96,12 +115,14 @@ def read_patterns(simulation_rounds, sectors):
96115

97116

98117
def write_json(output_path, output):
118+
logging.info('write %s', output_path)
99119
output_path.parent.mkdir(parents=True, exist_ok=True)
100120
with open(output_path, 'w', encoding='utf-8') as fp:
101121
fp.write(json.dumps(output, indent=2))
102122

103123

104124
def write_csv(output_path, output, fieldnames):
125+
logging.info('write %s', output_path)
105126
output_path.parent.mkdir(parents=True, exist_ok=True)
106127
with open(output_path, 'w', encoding='utf-8', newline='') as fp:
107128
writer = csv.DictWriter(fp, fieldnames=fieldnames, extrasaction='ignore')

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ markdown-customblocks
66
pre-commit
77
PyYAML
88
typos
9+
rich

0 commit comments

Comments
 (0)