1+ """
2+ mk_exf_conditions.py
3+ ====================
4+ Convert ERA5 NetCDF atmospheric forcing into big-endian float32 binary files
5+ on the **model curvilinear grid**.
6+
7+ All fields are bilinearly interpolated from the ERA5 0.25° regular grid to
8+ model cell-centre positions. Wind components (uwind, vwind) are additionally
9+ rotated from geographic (east, north) to model-grid (i, j) directions.
10+
11+ Because the output is already on the model grid, EXF interpolation must be
12+ disabled in data.exf (remove *_nlon / *_nlat / *_lon0 / *_lat0 / *_lon_inc /
13+ *_lat_inc entries from EXF_NML_04 for every field) and
14+ rotateStressOnAgrid = .FALSE.
15+ """
16+
117import os
218import sys
319from spectre_utils import common
723from datetime import datetime
824import numpy as np
925import xarray as xr
26+ from scipy .interpolate import RegularGridInterpolator
1027
11- # Number of time steps to load and write at once.
12- # ERA5 is hourly, so 744 ≈ one month. Tune down if memory is still tight.
1328TIME_CHUNK = 744
1429
30+ # Wind variable names that form a vector pair requiring rotation.
31+ WIND_VARS = {"uwind" , "vwind" }
32+
33+
34+ # ---------------------------------------------------------------------------
35+ # Model grid helpers
36+ # ---------------------------------------------------------------------------
37+
38+ def read_model_grid (horizgridfile , Nx , Ny ):
39+ """Read cell-centre lon/lat from the MITgcm curvilinear horizgridfile."""
40+ arr = np .fromfile (horizgridfile , dtype = ">f8" )
41+ fields = arr .reshape (16 , Ny + 1 , Nx + 1 )
42+ xC = fields [0 , :Ny , :Nx ]
43+ yC = fields [1 , :Ny , :Nx ]
44+ return xC , yC
45+
46+
47+ def compute_rotation_angles (xC , yC ):
48+ """Return (angleCS, angleSN) from the i-direction of the curvilinear grid."""
49+ dlon = np .empty_like (xC )
50+ dlat = np .empty_like (yC )
51+ dlon [:, 1 :- 1 ] = xC [:, 2 :] - xC [:, :- 2 ]
52+ dlat [:, 1 :- 1 ] = yC [:, 2 :] - yC [:, :- 2 ]
53+ dlon [:, 0 ] = xC [:, 1 ] - xC [:, 0 ]
54+ dlat [:, 0 ] = yC [:, 1 ] - yC [:, 0 ]
55+ dlon [:, - 1 ] = xC [:, - 1 ] - xC [:, - 2 ]
56+ dlat [:, - 1 ] = yC [:, - 1 ] - yC [:, - 2 ]
57+ angle = np .arctan2 (dlat , dlon * np .cos (np .deg2rad (yC )))
58+ return np .cos (angle ), np .sin (angle )
59+
60+
61+ # ---------------------------------------------------------------------------
62+ # ERA5 I/O
63+ # ---------------------------------------------------------------------------
1564
1665def _open_var (working_directory , prefix , mitgcm_name , years , t1 , t2 ):
1766 """Open a single ERA5 variable across all years as a lazily-chunked dataset."""
@@ -26,26 +75,87 @@ def _open_var(working_directory, prefix, mitgcm_name, years, t1, t2):
2675 data_vars = list (ds .data_vars )
2776 if len (data_vars ) == 1 and data_vars [0 ] != mitgcm_name :
2877 ds = ds .rename ({data_vars [0 ]: mitgcm_name })
29- # ERA5 latitude is stored north-to-south (60→20 N). Flip to south-to-north
30- # so the binary layout matches data.exf: lat0=20.0, lat_inc=+0.25 (j=0=20N).
78+ # ERA5 latitude is stored north-to-south (60→20 N). Flip to south-to-north.
3179 ds = ds .isel (latitude = slice (None , None , - 1 ))
3280 return ds
3381
3482
35- def _write_chunked (da , output_path ):
36- """Write a DataArray to a big-endian float32 binary file in time chunks."""
37- n_times = da .sizes ["valid_time" ]
83+ # ---------------------------------------------------------------------------
84+ # Interpolation
85+ # ---------------------------------------------------------------------------
86+
87+ def interp_scalar_chunk (era5_lat , era5_lon , data_2d , pts , Ny , Nx ):
88+ """Bilinear interpolation of a single 2-D ERA5 field to model grid points."""
89+ interp = RegularGridInterpolator (
90+ (era5_lat , era5_lon ), data_2d , method = "linear" ,
91+ bounds_error = False , fill_value = None ,
92+ )
93+ return interp (pts ).reshape (Ny , Nx ).astype (np .float32 )
94+
95+
96+ def interp_and_rotate_wind_chunk (era5_lat , era5_lon , u_2d , v_2d ,
97+ pts , Ny , Nx , angleCS , angleSN ):
98+ """Interpolate u/v and rotate from geographic to model-grid axes."""
99+ iu = RegularGridInterpolator (
100+ (era5_lat , era5_lon ), u_2d , method = "linear" ,
101+ bounds_error = False , fill_value = None ,
102+ )
103+ iv = RegularGridInterpolator (
104+ (era5_lat , era5_lon ), v_2d , method = "linear" ,
105+ bounds_error = False , fill_value = None ,
106+ )
107+ u_g = iu (pts ).reshape (Ny , Nx )
108+ v_g = iv (pts ).reshape (Ny , Nx )
109+ u_m = (u_g * angleCS + v_g * angleSN ).astype (np .float32 )
110+ v_m = (- u_g * angleSN + v_g * angleCS ).astype (np .float32 )
111+ return u_m , v_m
112+
113+
114+ # ---------------------------------------------------------------------------
115+ # Writers
116+ # ---------------------------------------------------------------------------
117+
118+ def write_scalar_on_model_grid (ds , varname , output_path ,
119+ era5_lat , era5_lon , pts , Ny , Nx ,
120+ scale_factor = None ):
121+ n_times = ds .sizes ["valid_time" ]
38122 with open (output_path , "wb" ) as f :
39123 for i in range (0 , n_times , TIME_CHUNK ):
40- chunk = da .isel (valid_time = slice (i , i + TIME_CHUNK )).values
41- chunk .astype (">f4" ).tofile (f )
124+ chunk = ds [varname ].isel (valid_time = slice (i , i + TIME_CHUNK )).values
125+ if scale_factor is not None :
126+ chunk = chunk * scale_factor
127+ for k in range (chunk .shape [0 ]):
128+ out = interp_scalar_chunk (era5_lat , era5_lon , chunk [k ], pts , Ny , Nx )
129+ out .astype (">f4" ).tofile (f )
130+ pct = 100.0 * min (i + chunk .shape [0 ], n_times ) / n_times
131+ print (f" { pct :.0f} %" )
42132
43133
44- def main ():
134+ def write_wind_on_model_grid (ds_u , ds_v , out_u_path , out_v_path ,
135+ era5_lat , era5_lon , pts , Ny , Nx ,
136+ angleCS , angleSN ):
137+ n_times = ds_u .sizes ["valid_time" ]
138+ with open (out_u_path , "wb" ) as fu , open (out_v_path , "wb" ) as fv :
139+ for i in range (0 , n_times , TIME_CHUNK ):
140+ u_chunk = ds_u ["uwind" ].isel (valid_time = slice (i , i + TIME_CHUNK )).values
141+ v_chunk = ds_v ["vwind" ].isel (valid_time = slice (i , i + TIME_CHUNK )).values
142+ for k in range (u_chunk .shape [0 ]):
143+ u_m , v_m = interp_and_rotate_wind_chunk (
144+ era5_lat , era5_lon , u_chunk [k ], v_chunk [k ],
145+ pts , Ny , Nx , angleCS , angleSN ,
146+ )
147+ u_m .astype (">f4" ).tofile (fu )
148+ v_m .astype (">f4" ).tofile (fv )
149+ pct = 100.0 * min (i + u_chunk .shape [0 ], n_times ) / n_times
150+ print (f" { pct :.0f} %" )
45151
46- args = common .cli ()
47152
48- # Load configuration from YAML file
153+ # ---------------------------------------------------------------------------
154+ # Main
155+ # ---------------------------------------------------------------------------
156+
157+ def main ():
158+ args = common .cli ()
49159 with open (args .config_file , "r" ) as f :
50160 config = yaml .safe_load (f )
51161
@@ -60,34 +170,62 @@ def main():
60170 t1 = datetime .strptime (config ["domain" ]["time" ]["start" ], "%Y-%m-%d" )
61171 t2 = datetime .strptime (config ["domain" ]["time" ]["end" ], "%Y-%m-%d" )
62172
63- # Process and write each variable one at a time to limit peak memory usage.
64- # Each variable's dataset is opened, written in TIME_CHUNK slices, then closed
65- # before the next variable is loaded.
173+ # --- Model grid ---
174+ npx = config ["domain" ]["mpi" ]["npx" ]
175+ npy = config ["domain" ]["mpi" ]["npy" ]
176+ Nx = 96 * npx # sNx * nPx
177+ Ny = 53 * npy # sNy * nPy
178+ horizgridfile = os .path .join (simulation_input_dir , "horizgridfile.bin" )
179+
180+ print ("Reading model grid..." )
181+ xC , yC = read_model_grid (horizgridfile , Nx , Ny )
182+ angleCS , angleSN = compute_rotation_angles (xC , yC )
183+ print (f" Grid: { Ny } x{ Nx } , lon [{ xC .min ():.1f} ,{ xC .max ():.1f} ], lat [{ yC .min ():.1f} ,{ yC .max ():.1f} ]" )
184+ print (f" Rotation: angle [{ np .rad2deg (np .arctan2 (angleSN , angleCS )).min ():.2f} , "
185+ f"{ np .rad2deg (np .arctan2 (angleSN , angleCS )).max ():.2f} ] deg" )
186+
187+ # ERA5 grid (after latitude flip: south-to-north)
188+ era5_lat = np .linspace (20.0 , 60.0 , 161 )
189+ era5_lon = np .linspace (- 90.0 , - 10.0 , 321 )
190+ pts = np .column_stack ([yC .ravel (), xC .ravel ()])
191+
192+ # --- Process scalar variables ---
66193 written = set ()
67194 for var in atm_vars :
68195 mitgcm_name = var ["mitgcm_name" ]
69- if mitgcm_name in written :
196+ if mitgcm_name in written or mitgcm_name in WIND_VARS :
70197 continue
71198 written .add (mitgcm_name )
72199
73200 print (f"Processing { mitgcm_name } ..." )
74201 scale_factor = var .get ("scale_factor" )
75-
76202 ds = _open_var (working_directory , prefix , mitgcm_name , years , t1 , t2 )
77- da = ds [mitgcm_name ]
78- if scale_factor is not None :
79- da = da * scale_factor
80-
81203 output_path = os .path .join (simulation_input_dir , f"{ mitgcm_name } .bin" )
82- _write_chunked (da , output_path )
204+ write_scalar_on_model_grid (
205+ ds , mitgcm_name , output_path ,
206+ era5_lat , era5_lon , pts , Ny , Nx ,
207+ scale_factor = scale_factor ,
208+ )
83209 ds .close ()
84210
85- # Compute derived variables (e.g. specific humidity from dewpoint + surface pressure).
86- # Only the two inputs are loaded at a time, also written in chunks.
211+ # --- Process wind vector (interpolate + rotate) ---
212+ print ("Processing uwind + vwind (vector interpolation + rotation)..." )
213+ ds_u = _open_var (working_directory , prefix , "uwind" , years , t1 , t2 )
214+ ds_v = _open_var (working_directory , prefix , "vwind" , years , t1 , t2 )
215+ write_wind_on_model_grid (
216+ ds_u , ds_v ,
217+ os .path .join (simulation_input_dir , "uwind.bin" ),
218+ os .path .join (simulation_input_dir , "vwind.bin" ),
219+ era5_lat , era5_lon , pts , Ny , Nx ,
220+ angleCS , angleSN ,
221+ )
222+ ds_u .close ()
223+ ds_v .close ()
224+
225+ # --- Computed variables (aqh from d2m + sp) ---
87226 for cv in computed_vars :
88227 mitgcm_name = cv ["mitgcm_name" ]
89228 print (f"Computing { mitgcm_name } ..." )
90-
91229 ds_d2m = _open_var (working_directory , prefix , "d2m" , years , t1 , t2 )
92230 ds_sp = _open_var (working_directory , prefix , "sp" , years , t1 , t2 )
93231 n_times = ds_d2m .sizes ["valid_time" ]
@@ -97,14 +235,24 @@ def main():
97235 for i in range (0 , n_times , TIME_CHUNK ):
98236 d2m_k = ds_d2m ["d2m" ].isel (valid_time = slice (i , i + TIME_CHUNK )).values
99237 sp_pa = ds_sp ["sp" ].isel (valid_time = slice (i , i + TIME_CHUNK )).values
100- aqh = specific_humidity_from_dewpoint (
101- sp_pa * units .Pa , (d2m_k - 273.15 ) * units .degC
238+ aqh_era = np .array (
239+ specific_humidity_from_dewpoint (
240+ sp_pa * units .Pa , (d2m_k - 273.15 ) * units .degC
241+ )
102242 )
103- np .array (aqh ).astype (">f4" ).tofile (f )
243+ for k in range (aqh_era .shape [0 ]):
244+ out = interp_scalar_chunk (
245+ era5_lat , era5_lon , aqh_era [k ], pts , Ny , Nx ,
246+ )
247+ out .astype (">f4" ).tofile (f )
248+ pct = 100.0 * min (i + aqh_era .shape [0 ], n_times ) / n_times
249+ print (f" { pct :.0f} %" )
104250
105251 ds_d2m .close ()
106252 ds_sp .close ()
107253
254+ print ("Done — all EXF fields written on model grid." )
255+
108256
109257if __name__ == "__main__" :
110258 main ()
0 commit comments