|
| 1 | +import xarray |
| 2 | +import numpy as np |
| 3 | +import pandas as pd |
| 4 | + |
| 5 | + |
| 6 | +def overwrite_time(ds, starts, ends): |
| 7 | + """ |
| 8 | + For a monthly dataset containing time and time_bnds, |
| 9 | + given an array starts containing len(ds.time) first days of the month |
| 10 | + and another matching array containing ends of the month/first days of the next month, |
| 11 | + overwrite time to be in the middle of the month, |
| 12 | + and use the starts and ends as the time bounds. |
| 13 | + """ |
| 14 | + delta = ends - starts |
| 15 | + middles = starts + (delta / 2) |
| 16 | + ds['time'] = middles |
| 17 | + ds['time_bnds'] = (('time', 'bnds'), np.column_stack((starts, ends))) |
| 18 | + return ds |
| 19 | + |
| 20 | + |
| 21 | +def main(): |
| 22 | + # Historical forcing: |
| 23 | + hist = xarray.open_dataset( |
| 24 | + '/work/acr/input4mips/mole-fraction-of-carbon-dioxide-in-air_input4MIPs_GHGConcentrations_CMIP_UoM-CMIP-1-2-0_gr-0p5x360deg_000001-201412.nc', |
| 25 | + decode_times=False |
| 26 | + ) |
| 27 | + |
| 28 | + # Slice to just the last 1200 times (= 100 years). Otherwise, |
| 29 | + # the times extend to 0000-01-01, which causes problems with xarray |
| 30 | + hist = hist.isel(time=slice(-1200, None)) |
| 31 | + |
| 32 | + # Also rename the bnds variable to match the scenario dataset |
| 33 | + hist = hist.rename({'bound': 'bnds'}) |
| 34 | + |
| 35 | + # Recreate the times in the dataset. It ends in Dec 2014 and is monthly. |
| 36 | + starts = pd.date_range(end='2014-12-01', periods=len(hist['time']), freq='1MS') |
| 37 | + ends = pd.date_range(end='2015-01-01', periods=len(hist['time']), freq='1MS') |
| 38 | + hist = overwrite_time(hist, starts, ends) |
| 39 | + |
| 40 | + # Scenario (ssp245) forcing: |
| 41 | + scenario = ( |
| 42 | + xarray.open_dataset('/work/acr/input4mips/mole-fraction-of-carbon-dioxide-in-air_input4MIPs_GHGConcentrations_ScenarioMIP_UoM-MESSAGE-GLOBIOM-ssp245-1-2-1_gr-0p5x360deg_201501-250012.nc', decode_times=False) |
| 43 | + .isel(time=slice(0, 12*50)) # Select just first 50 years |
| 44 | + ) |
| 45 | + |
| 46 | + # For the scenario, we know it starts in Jan 2015 |
| 47 | + starts = pd.date_range(start='2015-01-01', periods=len(scenario['time']), freq='1MS') |
| 48 | + ends = pd.date_range(start='2015-02-01', periods=len(scenario['time']), freq='1MS') |
| 49 | + scenario = overwrite_time(scenario, starts, ends) |
| 50 | + |
| 51 | + # Bring the two together |
| 52 | + combined = xarray.merge([hist, scenario]) |
| 53 | + |
| 54 | + # Add longitude coordinates to make sure FMS can read it. The actual data get repeated across longitude. |
| 55 | + combined['mole_fraction_of_carbon_dioxide_in_air'] = combined['mole_fraction_of_carbon_dioxide_in_air'].expand_dims({'lon': 5}, axis=-1) |
| 56 | + combined['lon'] = np.arange(-60.0, 301.0, 90.0) |
| 57 | + combined['lon'].attrs['axis'] = 'X' |
| 58 | + |
| 59 | + # Set how the data will be written out |
| 60 | + encodings = {k: {'_FillValue': 1.0e20} for k in list(combined.variables.keys())} |
| 61 | + encodings['time'].update({'dtype':'float64', 'calendar': 'gregorian', 'units': 'hours since 1915-01-01'}) |
| 62 | + encodings['time_bnds'].update({'dtype':'float64', 'calendar': 'gregorian', 'units': 'hours since 1915-01-01'}) |
| 63 | + encodings['lon'].update({'dtype':'float64'}) |
| 64 | + |
| 65 | + combined.to_netcdf('mole_fraction_of_co2_extended_ssp245_v2.nc', encoding=encodings, unlimited_dims='time') |
| 66 | + |
| 67 | + |
| 68 | +if __name__ == '__main__': |
| 69 | + main() |
0 commit comments