Skip to content

Commit cb6a51e

Browse files
authored
Generate repro test errors when model does not run successfully (#173)
* Raise RuntimeErrors for failed model runs, and add more detailed logs to exceptions so it is displayed in the test output on Github * Repro tests: Move checking experiment runs to requested_experiments fixture. This means any errors from non-zero exit status from model runs are reported as test setup errors vs reproducibility test failures. * Add a test to check errors are caught and re-raised in Experiments class * Add a test_test_repro_determinism test * Add coverage patch subprocess (See https://pytest-cov.readthedocs.io/en/latest/subprocess-support.html)
1 parent 5b34e0c commit cb6a51e

5 files changed

Lines changed: 180 additions & 51 deletions

File tree

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ tag_prefix = "v"
112112
parentdir_prefix = "model_config_tests-"
113113

114114
[tool.coverage.run]
115+
patch = ["subprocess"]
115116
omit = [
116-
"src/model_config_tests/_version.py"
117+
"*/model_config_tests/_version.py",
118+
"src/model_config_tests/_version.py",
117119
]

src/model_config_tests/config_tests/test_bit_reproducibility.py

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import pytest
1111

12-
from model_config_tests.exp_test_helper import Experiments
12+
from model_config_tests.exp_test_helper import Experiments, ExpTestHelper
1313
from model_config_tests.util import DAY_IN_SECONDS, HOUR_IN_SECONDS
1414

1515
# Names of shared experiments
@@ -147,6 +147,20 @@ def experiments(
147147
return _experiments(experiments_markers, output_path, control_path, keep_archive)
148148

149149

150+
@pytest.fixture
151+
def requested_experiments(request, experiments: Experiments):
152+
"""Fixture to check that requested experiments have run successfully
153+
and return a dictionary of ExpTestHelper instances for each experiment."""
154+
exp_marker = request.node.get_closest_marker("experiments").args[0]
155+
requested_exps = {}
156+
for exp_name in exp_marker:
157+
# Check experiment has run successfully - this will raise an
158+
# error if there are any non-zero exit codes in the outputs
159+
experiments.check_experiment(exp_name)
160+
requested_exps[exp_name] = experiments.get_experiment(exp_name)
161+
return requested_exps
162+
163+
150164
class TestBitReproducibility:
151165

152166
@pytest.mark.repro
@@ -160,7 +174,7 @@ def test_repro_historical(
160174
self,
161175
output_path: Path,
162176
control_path: Path,
163-
experiments: Experiments,
177+
requested_experiments: dict[str, ExpTestHelper],
164178
checksum_path: Optional[Path],
165179
):
166180
"""
@@ -178,9 +192,9 @@ def test_repro_historical(
178192
Path to the model configuration to test. This is copied for
179193
for control directories in experiments. Default is set in
180194
conftests.py.
181-
experiments: Experiments
182-
Class that manages the shared experiments. This is a fixture
183-
defined in this file.
195+
requested_experiments: dict[str, ExpTestHelper]
196+
A dictionary of requested experiments, where the key is the
197+
experiment name and the value is an instance of ExpTestHelper.
184198
checksum_path: Optional[Path]
185199
Path to checksums to compare model output against. Default is
186200
set to checksums saved on model configuration. This is a
@@ -190,12 +204,7 @@ def test_repro_historical(
190204
checksum_output_dir = set_checksum_output_dir(output_path=output_path)
191205

192206
# Use default runtime experiment to get the historical checksums
193-
experiments.check_experiments([EXP_DEFAULT_RUNTIME])
194-
exp = experiments.get_experiment(EXP_DEFAULT_RUNTIME)
195-
196-
assert (
197-
exp.model.output_exists()
198-
), "Output file required for model checksums does not exist"
207+
exp = requested_experiments.get(EXP_DEFAULT_RUNTIME)
199208

200209
# Set the checksum output filename using the model default runtime
201210
runtime_hours = exp.model.default_runtime_seconds // HOUR_IN_SECONDS
@@ -235,20 +244,16 @@ def test_repro_historical(
235244
EXP_1D_RUNTIME_REPEAT: {"n_runs": 1, "model_runtime": DAY_IN_SECONDS},
236245
}
237246
)
238-
def test_repro_determinism(self, experiments: Experiments):
247+
def test_repro_determinism(self, requested_experiments: dict[str, ExpTestHelper]):
239248
"""
240249
Determinism test that confirms repeated model runs for 1 day
241250
give the same results
242251
"""
243-
experiments.check_experiments([EXP_1D_RUNTIME, EXP_1D_RUNTIME_REPEAT])
244-
exp_1d_runtime = experiments.get_experiment(EXP_1D_RUNTIME)
245-
exp_1d_runtime_repeat = experiments.get_experiment(EXP_1D_RUNTIME_REPEAT)
252+
exp_1d_runtime = requested_experiments.get(EXP_1D_RUNTIME)
253+
exp_1d_runtime_repeat = requested_experiments.get(EXP_1D_RUNTIME_REPEAT)
246254

247255
# Compare expected to produced.
248-
assert exp_1d_runtime.model.output_exists()
249256
expected = exp_1d_runtime.extract_checksums()
250-
251-
assert exp_1d_runtime_repeat.model.output_exists()
252257
produced = exp_1d_runtime_repeat.extract_checksums()
253258

254259
assert produced == expected
@@ -262,16 +267,17 @@ def test_repro_determinism(self, experiments: Experiments):
262267
EXP_2D_RUNTIME: {"n_runs": 1, "model_runtime": 2 * DAY_IN_SECONDS},
263268
}
264269
)
265-
def test_repro_restart(self, output_path: Path, experiments: Experiments):
270+
def test_repro_restart(
271+
self, output_path: Path, requested_experiments: dict[str, ExpTestHelper]
272+
):
266273
"""
267274
Restart reproducibility test that confirms two short consecutive
268275
1-day model runs give the same results as a longer single 2-day model
269276
run.
270277
"""
271278
# Get experiments with 2x1 day and 2 day runtimes
272-
experiments.check_experiments([EXP_1D_RUNTIME, EXP_2D_RUNTIME])
273-
exp_1d_runtime = experiments.get_experiment(EXP_1D_RUNTIME)
274-
exp_2d_runtime = experiments.get_experiment(EXP_2D_RUNTIME)
279+
exp_1d_runtime = requested_experiments.get(EXP_1D_RUNTIME)
280+
exp_2d_runtime = requested_experiments.get(EXP_2D_RUNTIME)
275281

276282
# Now compare the output between our two short and one long run.
277283
checksums_1d_0 = exp_1d_runtime.extract_checksums()
@@ -305,14 +311,15 @@ def test_repro_restart(self, output_path: Path, experiments: Experiments):
305311
EXP_1D_RUNTIME_REPEAT: {"n_runs": 2, "model_runtime": DAY_IN_SECONDS},
306312
}
307313
)
308-
def test_repro_determinism_restart(self, experiments: Experiments):
314+
def test_repro_determinism_restart(
315+
self, requested_experiments: dict[str, ExpTestHelper]
316+
):
309317
"""
310318
Determinism test that confirms repeated experiments with two
311319
consecutive 1-day model runs give the same results
312320
"""
313-
experiments.check_experiments([EXP_1D_RUNTIME, EXP_1D_RUNTIME_REPEAT])
314-
exp_1d_runtime = experiments.get_experiment(EXP_1D_RUNTIME)
315-
exp_1d_runtime_repeat = experiments.get_experiment(EXP_1D_RUNTIME_REPEAT)
321+
exp_1d_runtime = requested_experiments.get(EXP_1D_RUNTIME)
322+
exp_1d_runtime_repeat = requested_experiments.get(EXP_1D_RUNTIME_REPEAT)
316323

317324
# Extract checksums, using the output from the second model run
318325
expected = exp_1d_runtime.extract_checksums(exp_1d_runtime.model.output_1)

src/model_config_tests/exp_test_helper.py

Lines changed: 42 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,30 @@ def submit_payu_run(self, n_runs: int = None) -> str:
136136
# Change to experiment directory and run.
137137
os.chdir(self.control_path)
138138

139-
print("Running payu setup and payu sweep commands")
140-
sp.run(["payu", "setup", "--lab", str(self.lab_path)], check=True)
141-
sp.run(["payu", "sweep", "--lab", str(self.lab_path)], check=True)
139+
print("Running payu setup")
140+
result = sp.run(
141+
["payu", "setup", "--lab", str(self.lab_path)],
142+
capture_output=True,
143+
text=True,
144+
)
145+
if result.returncode != 0:
146+
# Add additional error messaging for debugging
147+
error_msg = (
148+
"Failed to run payu setup:\n"
149+
f"Return code: {result.returncode}\n"
150+
f"--- stdout ---\n{result.stdout}\n"
151+
f"--- stderr ---\n{result.stderr}"
152+
)
153+
print(error_msg)
154+
raise RuntimeError(error_msg)
155+
156+
print("Running payu sweep")
157+
sp.run(
158+
["payu", "sweep", "--lab", str(self.lab_path)],
159+
capture_output=True,
160+
text=True,
161+
check=True,
162+
)
142163

143164
run_command = ["payu", "run", "--lab", str(self.lab_path)]
144165
if n_runs:
@@ -208,7 +229,7 @@ def __init__(
208229
self.output_path = output_path
209230
self.keep_archive = keep_archive
210231
self.experiments = {}
211-
self.successful_experiments = []
232+
self.experiment_errors = {}
212233

213234
def setup_and_submit(
214235
self,
@@ -282,22 +303,27 @@ def wait_for_all_experiments(self, catch_errors=True) -> None:
282303
try:
283304
exp.wait_for_payu_run()
284305
print(f"Experiment {exp_name} completed successfully")
285-
self.successful_experiments.append(exp_name)
286306
except RuntimeError as e:
307+
self.experiment_errors[exp_name] = str(e)
287308
if catch_errors:
288-
print(f"Error in experiment {exp_name}: {e}")
309+
print(f"Error running experiment {exp_name}: {e}")
289310
else:
290-
raise e
311+
raise
291312

292-
def check_experiments(self, exp_names=list[str]) -> None:
313+
def check_experiment(self, exp_name: str) -> None:
293314
"""
294-
Check whether given experiments names have run successfully
315+
Check whether given experiment name has run successfully
295316
"""
296-
for exp_name in exp_names:
297-
# TODO: Is there other useful information to display here?
298-
assert (
299-
exp_name in self.successful_experiments
300-
), f"There was an error running experiment: {exp_name}"
317+
if exp_name in self.experiment_errors:
318+
raise RuntimeError(
319+
f"There was an error running experiment {exp_name}:"
320+
f" {self.experiment_errors[exp_name]}"
321+
)
322+
323+
# Double check if the required experiment output exists
324+
exp = self.experiments.get(exp_name)
325+
if not exp.model.output_exists():
326+
raise RuntimeError(f"Experiment {exp_name} output file does not exist.")
301327

302328

303329
def setup_exp(
@@ -519,13 +545,13 @@ def wait_for_qsub_job(
519545
# Check whether the run job was successful
520546
exit_status = parse_exit_status_from_file(stdout)
521547
if exit_status != 0:
522-
print(
548+
raise RuntimeError(
549+
f"Payu {job_type} job failed with exit status {exit_status}:\n"
523550
f"Job_ID: {job_id}\n"
524551
f"Output files: {output_files}\n"
525552
f"--- stdout ---\n{stdout}\n"
526553
f"--- stderr ---\n{stderr}\n"
527554
)
528-
raise RuntimeError(f"Payu {job_type} job failed with exit status {exit_status}")
529555

530556
return stdout, stderr, output_files
531557

tests/config_tests/test_test_bit_reproducibility.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,8 @@ def base_test_command(self):
213213
# Minimal test command
214214
test_cmd = (
215215
"model-config-tests -s "
216-
# Use -k to select one test
217-
f"-k {self.test_name} "
216+
# Use -m to specify the test (each repro test has a unique marker)
217+
f"-m {self.test_name.removeprefix('test_')} "
218218
f"--output-path {self.output_path} "
219219
# Keep archive flag will keep any pre-existing archive for the test
220220
# and disable the actual 'payu run' steps
@@ -500,3 +500,31 @@ def check_checksum(output_path, checksum_path, model_name, match=True):
500500
assert test_checksum.read_text() == checksum_path.read_text()
501501
else:
502502
assert test_checksum.read_text() != checksum_path.read_text()
503+
504+
505+
@pytest.mark.parametrize("fail", [False, True])
506+
def test_test_repro_determinism(tmp_dir, fail):
507+
"""Test repro determinism for some example output"""
508+
test_name = "test_repro_determinism"
509+
510+
exp1_name = "exp_1d_runtime"
511+
exp2_name = "exp_1d_runtime_repeat"
512+
513+
# Setup some example files for both experiments
514+
exp1_helper = CommonTestHelper(test_name, exp1_name, "access", tmp_dir)
515+
exp1_helper.copy_config("release-preindustrial+concentrations")
516+
exp1_helper.create_mock_output("output000", modify=False)
517+
518+
exp2_helper = CommonTestHelper(test_name, exp2_name, "access", tmp_dir)
519+
exp2_helper.create_mock_output("output000", modify=fail)
520+
521+
# Build test command
522+
test_cmd = (
523+
f"{exp1_helper.base_test_command()} "
524+
f"--control-path {exp1_helper.control_path} "
525+
)
526+
527+
# Run test in a subprocess call
528+
result = subprocess.run(shlex.split(test_cmd), capture_output=True, text=True)
529+
# Check test result
530+
assert result.returncode == int(fail)

0 commit comments

Comments
 (0)