WRF-HYDRO_CALIB/createCalibratedParams.py at master · bsu-wrudisill/WRF-HYDRO_CALIB · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import numpy as np
import pandas as pd
from pathlib import Path
import xarray as xr
import shutil
import sys
libPathList = ['./lib/Python', './util']
for libPath in libPathList:
	sys.path.insert(0,libPath)
from Calibration import Calibration
import accessories as acc
import os
import yaml
import argparse


####################################################################################
"""
createCalibratedParams.py <path_to_calibration_directory>

Description: This script makes the calibred "domain" files based on the results from
a calibration run. To run it, you must provide the path to the .../Calibration directory,
or wherever contains the "Calibration.db" file. The script will create a directory
called "calibrated_parameters" and place files there.

"""
#####################################################################################


# Parse Input args ...
parser = argparse.ArgumentParser()
parser.add_argument("directory", type=str, help="input directory")
parser.add_argument("iteration", default=None, type=int, nargs='?', help="requested iteration")
args = parser.parse_args()


directory = Path(args.directory).resolve()
setupfile = directory.joinpath('setup.yaml')

# create the setup instance
calib = Calibration(setupfile)

#output location
path_to_output_files = Path("./calibrated_parameters")
path_to_output_files.mkdir(exist_ok=True)


# copy over all of the files from the original directory --- that way it is compelte
for domainfile in list(calib.parmdirc.glob('*')):
	print(domainfile)
	shutil.copy(domainfile, path_to_output_files)


def getParameters(dbcon):
    param_cmd = "SELECT * FROM PARAMETERS WHERE calib_flag = 1"
    param = pd.read_sql(sql = param_cmd, con="sqlite:///{}".format(dbcon))
    return param


def getPerformance(dbcon, **kwargs):
    # only use this when there is just one iteration
    perf_cmd = "SELECT * FROM CALIBRATION"
    perf = pd.read_sql(sql = perf_cmd, con="sqlite:///{}".format(dbcon))
    return perf

def returnQmodOnly(dbcon, **kwargs):
    # only use this when there is just one iteration
    mod_cmd = "SELECT * FROM MODOUT"
    mod = pd.read_sql(sql = mod_cmd, con="sqlite:///{}".format(dbcon))
    mod['time'] = pd.to_datetime(mod['time'])
    mod['type'] = 'WRF_Hydro V5'
    return mod


param = getParameters(calib.database)
param.iteration = list(map(int, param.iteration))
performance = getPerformance(calib.database)

if args.iteration == None:
        print('finding parameter with minimum objective function')
        best_row = performance.loc[(performance.objective == performance['objective'].min()) & (performance.improvement ==1)]
else:
        print('grabbing parameter ... %s'%args.iteration)
        best_row = performance.loc[performance.iteration == str(args.iteration)]


initial_run = performance.loc[performance.iteration == '0']
print("========= Initial Run ========")
print(initial_run)

print("========== Selection =========")
print(best_row)

best_parameters = param.loc[param.iteration == int(best_row.iteration)]
best_parameters.set_index('parameter', inplace=True)


# read the calibration table
clb = pd.read_csv('calib_params.tbl', delimiter=' *, *', engine='python')
clb.set_index('parameter', inplace=True)

# I changed the stupid way that the parameters get read in... ugh.
# append the file name to the dataframe...
clb["file"] = None
clb["dims"] = None

with open('calib_params.yaml') as y:
    yamlfile = yaml.load(y, Loader=yaml.FullLoader)

keys = yamlfile['parameters'].keys()

# Group the files with the table ...
for param in clb.index:
    if param in keys:
        clb.at[param, 'file'] = yamlfile['parameters'][param]['file']
        clb.at[param, 'dims'] = yamlfile['parameters'][param]['dimensions']


print(clb)

grouped = clb.groupby('file')
ncList = grouped.groups.keys()

# open each file once and adjust the paremater values
for ncSingle in ncList:
	UpdateMe = xr.open_dataset(calib.parmdirc.joinpath(ncSingle))
	os.remove(path_to_output_files.joinpath(ncSingle)) # this is kinda dumb.... we can't overwrite the file
	# but we only want to deletete the ones that get updated
	for param in grouped.groups[ncSingle]:
		if param in list(best_parameters.index):
			updateFun = acc.AddOrMult(clb.loc[param].factor)
			dims = clb.loc[param].dims
			updateVal = best_parameters.loc[param].currentValue

			# apply logic to update w/ the correct dims
			if dims == 1:
				UpdateMe[param][:] = updateFun(UpdateMe[param][:], updateVal)
			if dims == 2:
				UpdateMe[param][:,:] = updateFun(UpdateMe[param][:,:], updateVal)
			if dims == 3:
				UpdateMe[param][:,:,:] = updateFun(UpdateMe[param][:,:,:], updateVal)
	UpdateMe.to_netcdf(path_to_output_files.joinpath(ncSingle))
	UpdateMe.close()
#
##output directory