Skip to content

Commit 0346f7f

Browse files
Read EXF binary files in review and animate utils instead of ERA5 NetCDF
Adds load_exf_binaries() to common.py which memory-maps the written .bin files and reconstructs coordinates from NetCDF metadata only. Updates review_exf_conditions.py and animate_exf_conditions.py to use this so QC checks and animations reflect the actual data that MITgcm will consume.
1 parent 8511733 commit 0346f7f

3 files changed

Lines changed: 70 additions & 31 deletions

File tree

spectre_utils/animate_exf_conditions.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
import matplotlib.pyplot as plt
88
from matplotlib.animation import FFMpegWriter
99
from datetime import datetime
10-
from metpy.calc import specific_humidity_from_dewpoint
11-
from metpy.units import units
1210

1311
from spectre_utils import common
1412

@@ -106,15 +104,9 @@ def main():
106104
t1 = datetime.strptime(config["domain"]["time"]["start"], "%Y-%m-%d")
107105
t2 = datetime.strptime(config["domain"]["time"]["end"], "%Y-%m-%d")
108106

109-
print("Loading dataset...")
110-
ds = common.load_atm_dataset(working_directory, prefix, years, atm_vars, t1, t2)
111-
112-
if "sp" in ds and "d2m" in ds:
113-
d2m_celsius = ds["d2m"] - 273.15
114-
ds["aqh"] = specific_humidity_from_dewpoint(ds["sp"] * units.Pa, d2m_celsius * units.degC)
115-
116107
animations_dir = os.path.join(simulation_directory, "animations", "atmosphere")
117108
os.makedirs(animations_dir, exist_ok=True)
109+
simulation_input_dir = os.path.join(simulation_directory, "input")
118110

119111
# Collect all variable names to animate (configured + computed), deduplicated
120112
seen = set()
@@ -130,6 +122,11 @@ def main():
130122
seen.add(n)
131123
to_animate.append(n)
132124

125+
print("Loading EXF binary files...")
126+
ds = common.load_exf_binaries(
127+
simulation_input_dir, to_animate, working_directory, prefix, years, atm_vars, t1, t2
128+
)
129+
133130
for name in to_animate:
134131
if name not in ds:
135132
print(f" Skipping {name} (not in dataset)")

spectre_utils/common.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,54 @@ def from_copernicus(dataset_id, variables, start_date, end_date, min_long, max_l
6868
return ds_full, grid
6969

7070

71+
def load_exf_binaries(simulation_input_dir, var_names, working_directory, prefix, years, atm_vars, t1, t2):
72+
"""Load EXF binary files into a labelled xarray Dataset.
73+
74+
Coordinates (time, latitude, longitude) are extracted from the ERA5 NetCDF
75+
source files (metadata only — no data are read from them). The binary data
76+
are memory-mapped so only the portions actually accessed are read into RAM.
77+
"""
78+
import xarray as xr
79+
import numpy as np
80+
import os
81+
82+
# Obtain grid coordinates from the first available NetCDF variable.
83+
coord_ds = None
84+
for var in atm_vars:
85+
mitgcm_name = var["mitgcm_name"]
86+
files = [f"{working_directory}/{prefix}_{mitgcm_name}_{year}.nc" for year in years]
87+
if all(os.path.exists(f) for f in files):
88+
coord_ds = xr.open_mfdataset(files, combine="by_coords").sel(
89+
valid_time=slice(t1, t2)
90+
)
91+
break
92+
if coord_ds is None:
93+
raise FileNotFoundError("No ERA5 NetCDF files found for coordinate extraction.")
94+
95+
times = coord_ds["valid_time"].values
96+
lats = coord_ds["latitude"].values
97+
lons = coord_ds["longitude"].values
98+
coord_ds.close()
99+
100+
nt, ny, nx = len(times), len(lats), len(lons)
101+
102+
data_vars = {}
103+
for name in var_names:
104+
bin_path = os.path.join(simulation_input_dir, f"{name}.bin")
105+
if not os.path.exists(bin_path):
106+
print(f"Warning: binary file not found, skipping: {bin_path}", file=sys.stderr)
107+
continue
108+
arr = np.memmap(bin_path, dtype=">f4", mode="r", shape=(nt, ny, nx))
109+
data_vars[name] = xr.DataArray(
110+
arr,
111+
dims=["valid_time", "latitude", "longitude"],
112+
coords={"valid_time": times, "latitude": lats, "longitude": lons},
113+
name=name,
114+
)
115+
116+
return xr.Dataset(data_vars)
117+
118+
71119
def load_atm_dataset(working_directory, prefix, years, atm_vars, t1, t2):
72120
"""Load ERA5 atmospheric variables per MITgcm name, rename, and apply optional scale factors.
73121

spectre_utils/review_exf_conditions.py

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
"""
22
QC review of EXF atmospheric forcing fields.
33
4-
Loads the ERA5 xarray dataset (same config-driven pipeline as
5-
mk_exf_conditions.py), computes derived variables, runs physical
6-
range / consistency / temporal checks, verifies binary file sizes, and
7-
writes a Markdown report plus three diagnostic figures to:
4+
Loads the written binary forcing files, runs physical range / consistency /
5+
temporal checks, verifies binary file sizes, and writes a Markdown report
6+
plus three diagnostic figures to:
87
98
<simulation_directory>/review/atmosphere/
109
report.md
@@ -23,9 +22,6 @@
2322
import matplotlib.pyplot as plt
2423
from datetime import datetime, timezone
2524

26-
from metpy.calc import specific_humidity_from_dewpoint
27-
from metpy.units import units as metpy_units
28-
2925
from spectre_utils import common
3026

3127
# ---------------------------------------------------------------------------
@@ -216,22 +212,8 @@ def main():
216212
os.makedirs(review_dir, exist_ok=True)
217213

218214
# ------------------------------------------------------------------
219-
# Load dataset
215+
# Build ordered list of all mitgcm names (configured + computed)
220216
# ------------------------------------------------------------------
221-
print("Loading ERA5 dataset...")
222-
ds = common.load_atm_dataset(working_directory, prefix, years, atm_vars, t1, t2)
223-
224-
if "sp" in ds and "d2m" in ds:
225-
d2m_celsius = ds["d2m"] - 273.15
226-
ds["aqh"] = specific_humidity_from_dewpoint(
227-
ds["sp"] * metpy_units.Pa, d2m_celsius * metpy_units.degC
228-
)
229-
230-
nt = ds.sizes["valid_time"]
231-
ny = ds.sizes.get("latitude")
232-
nx = ds.sizes.get("longitude")
233-
234-
# Ordered list of all mitgcm names (configured + computed), deduplicated
235217
seen: set[str] = set()
236218
all_names: list[str] = []
237219
for var in atm_vars:
@@ -245,6 +227,18 @@ def main():
245227
seen.add(n)
246228
all_names.append(n)
247229

230+
# ------------------------------------------------------------------
231+
# Load binary forcing files
232+
# ------------------------------------------------------------------
233+
print("Loading EXF binary files...")
234+
ds = common.load_exf_binaries(
235+
simulation_input_dir, all_names, working_directory, prefix, years, atm_vars, t1, t2
236+
)
237+
238+
nt = ds.sizes["valid_time"]
239+
ny = ds.sizes.get("latitude")
240+
nx = ds.sizes.get("longitude")
241+
248242
# ------------------------------------------------------------------
249243
# Diagnostic figures
250244
# ------------------------------------------------------------------

0 commit comments

Comments
 (0)