Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ tag_prefix = "v"
parentdir_prefix = "model_config_tests-"

[tool.coverage.run]
patch = ["subprocess"]
omit = [
"src/model_config_tests/_version.py"
"*/model_config_tests/_version.py",
"src/model_config_tests/_version.py",
]
59 changes: 33 additions & 26 deletions src/model_config_tests/config_tests/test_bit_reproducibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import pytest

from model_config_tests.exp_test_helper import Experiments
from model_config_tests.exp_test_helper import Experiments, ExpTestHelper
from model_config_tests.util import DAY_IN_SECONDS, HOUR_IN_SECONDS

# Names of shared experiments
Expand Down Expand Up @@ -147,6 +147,20 @@ def experiments(
return _experiments(experiments_markers, output_path, control_path, keep_archive)


@pytest.fixture
def requested_experiments(request, experiments: Experiments):
"""Fixture to check that requested experiments have run successfully
and return a dictionary of ExpTestHelper instances for each experiment."""
exp_marker = request.node.get_closest_marker("experiments").args[0]
requested_exps = {}
for exp_name in exp_marker:
# Check experiment has run successfully - this will raise an
# error if there are any non-zero exit codes in the outputs
experiments.check_experiment(exp_name)
requested_exps[exp_name] = experiments.get_experiment(exp_name)
return requested_exps


class TestBitReproducibility:

@pytest.mark.repro
Expand All @@ -160,7 +174,7 @@ def test_repro_historical(
self,
output_path: Path,
control_path: Path,
experiments: Experiments,
requested_experiments: dict[str, ExpTestHelper],
checksum_path: Optional[Path],
):
"""
Expand All @@ -178,9 +192,9 @@ def test_repro_historical(
Path to the model configuration to test. This is copied for
for control directories in experiments. Default is set in
conftests.py.
experiments: Experiments
Class that manages the shared experiments. This is a fixture
defined in this file.
requested_experiments: dict[str, ExpTestHelper]
A dictionary of requested experiments, where the key is the
experiment name and the value is an instance of ExpTestHelper.
checksum_path: Optional[Path]
Path to checksums to compare model output against. Default is
set to checksums saved on model configuration. This is a
Expand All @@ -190,12 +204,7 @@ def test_repro_historical(
checksum_output_dir = set_checksum_output_dir(output_path=output_path)

# Use default runtime experiment to get the historical checksums
experiments.check_experiments([EXP_DEFAULT_RUNTIME])
exp = experiments.get_experiment(EXP_DEFAULT_RUNTIME)

assert (
exp.model.output_exists()
), "Output file required for model checksums does not exist"
exp = requested_experiments.get(EXP_DEFAULT_RUNTIME)

# Set the checksum output filename using the model default runtime
runtime_hours = exp.model.default_runtime_seconds // HOUR_IN_SECONDS
Expand Down Expand Up @@ -235,20 +244,16 @@ def test_repro_historical(
EXP_1D_RUNTIME_REPEAT: {"n_runs": 1, "model_runtime": DAY_IN_SECONDS},
}
)
def test_repro_determinism(self, experiments: Experiments):
def test_repro_determinism(self, requested_experiments: dict[str, ExpTestHelper]):
"""
Determinism test that confirms repeated model runs for 1 day
give the same results
"""
experiments.check_experiments([EXP_1D_RUNTIME, EXP_1D_RUNTIME_REPEAT])
exp_1d_runtime = experiments.get_experiment(EXP_1D_RUNTIME)
exp_1d_runtime_repeat = experiments.get_experiment(EXP_1D_RUNTIME_REPEAT)
exp_1d_runtime = requested_experiments.get(EXP_1D_RUNTIME)
exp_1d_runtime_repeat = requested_experiments.get(EXP_1D_RUNTIME_REPEAT)

# Compare expected to produced.
assert exp_1d_runtime.model.output_exists()
expected = exp_1d_runtime.extract_checksums()

assert exp_1d_runtime_repeat.model.output_exists()
produced = exp_1d_runtime_repeat.extract_checksums()

assert produced == expected
Expand All @@ -262,16 +267,17 @@ def test_repro_determinism(self, experiments: Experiments):
EXP_2D_RUNTIME: {"n_runs": 1, "model_runtime": 2 * DAY_IN_SECONDS},
}
)
def test_repro_restart(self, output_path: Path, experiments: Experiments):
def test_repro_restart(
self, output_path: Path, requested_experiments: dict[str, ExpTestHelper]
):
"""
Restart reproducibility test that confirms two short consecutive
1-day model runs give the same results as a longer single 2-day model
run.
"""
# Get experiments with 2x1 day and 2 day runtimes
experiments.check_experiments([EXP_1D_RUNTIME, EXP_2D_RUNTIME])
exp_1d_runtime = experiments.get_experiment(EXP_1D_RUNTIME)
exp_2d_runtime = experiments.get_experiment(EXP_2D_RUNTIME)
exp_1d_runtime = requested_experiments.get(EXP_1D_RUNTIME)
exp_2d_runtime = requested_experiments.get(EXP_2D_RUNTIME)

# Now compare the output between our two short and one long run.
checksums_1d_0 = exp_1d_runtime.extract_checksums()
Expand Down Expand Up @@ -305,14 +311,15 @@ def test_repro_restart(self, output_path: Path, experiments: Experiments):
EXP_1D_RUNTIME_REPEAT: {"n_runs": 2, "model_runtime": DAY_IN_SECONDS},
}
)
def test_repro_determinism_restart(self, experiments: Experiments):
def test_repro_determinism_restart(
self, requested_experiments: dict[str, ExpTestHelper]
):
"""
Determinism test that confirms repeated experiments with two
consecutive 1-day model runs give the same results
"""
experiments.check_experiments([EXP_1D_RUNTIME, EXP_1D_RUNTIME_REPEAT])
exp_1d_runtime = experiments.get_experiment(EXP_1D_RUNTIME)
exp_1d_runtime_repeat = experiments.get_experiment(EXP_1D_RUNTIME_REPEAT)
exp_1d_runtime = requested_experiments.get(EXP_1D_RUNTIME)
exp_1d_runtime_repeat = requested_experiments.get(EXP_1D_RUNTIME_REPEAT)

# Extract checksums, using the output from the second model run
expected = exp_1d_runtime.extract_checksums(exp_1d_runtime.model.output_1)
Expand Down
58 changes: 42 additions & 16 deletions src/model_config_tests/exp_test_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,30 @@ def submit_payu_run(self, n_runs: int = None) -> str:
# Change to experiment directory and run.
os.chdir(self.control_path)

print("Running payu setup and payu sweep commands")
sp.run(["payu", "setup", "--lab", str(self.lab_path)], check=True)
sp.run(["payu", "sweep", "--lab", str(self.lab_path)], check=True)
print("Running payu setup")
result = sp.run(
["payu", "setup", "--lab", str(self.lab_path)],
capture_output=True,
text=True,
)
if result.returncode != 0:
# Add additional error messaging for debugging
error_msg = (
"Failed to run payu setup:\n"
f"Return code: {result.returncode}\n"
f"--- stdout ---\n{result.stdout}\n"
f"--- stderr ---\n{result.stderr}"
)
print(error_msg)
raise RuntimeError(error_msg)

print("Running payu sweep")
sp.run(
["payu", "sweep", "--lab", str(self.lab_path)],
capture_output=True,
text=True,
check=True,
)

run_command = ["payu", "run", "--lab", str(self.lab_path)]
if n_runs:
Expand Down Expand Up @@ -208,7 +229,7 @@ def __init__(
self.output_path = output_path
self.keep_archive = keep_archive
self.experiments = {}
self.successful_experiments = []
self.experiment_errors = {}

def setup_and_submit(
self,
Expand Down Expand Up @@ -282,22 +303,27 @@ def wait_for_all_experiments(self, catch_errors=True) -> None:
try:
exp.wait_for_payu_run()
print(f"Experiment {exp_name} completed successfully")
self.successful_experiments.append(exp_name)
except RuntimeError as e:
self.experiment_errors[exp_name] = str(e)
if catch_errors:
print(f"Error in experiment {exp_name}: {e}")
print(f"Error running experiment {exp_name}: {e}")
else:
raise e
raise

def check_experiments(self, exp_names=list[str]) -> None:
def check_experiment(self, exp_name: str) -> None:
"""
Check whether given experiments names have run successfully
Check whether given experiment name has run successfully
"""
for exp_name in exp_names:
# TODO: Is there other useful information to display here?
assert (
exp_name in self.successful_experiments
), f"There was an error running experiment: {exp_name}"
if exp_name in self.experiment_errors:
raise RuntimeError(
f"There was an error running experiment {exp_name}:"
f" {self.experiment_errors[exp_name]}"
)

# Double check if the required experiment output exists
exp = self.experiments.get(exp_name)
if not exp.model.output_exists():
raise RuntimeError(f"Experiment {exp_name} output file does not exist.")


def setup_exp(
Expand Down Expand Up @@ -519,13 +545,13 @@ def wait_for_qsub_job(
# Check whether the run job was successful
exit_status = parse_exit_status_from_file(stdout)
if exit_status != 0:
print(
raise RuntimeError(
f"Payu {job_type} job failed with exit status {exit_status}:\n"
f"Job_ID: {job_id}\n"
f"Output files: {output_files}\n"
f"--- stdout ---\n{stdout}\n"
f"--- stderr ---\n{stderr}\n"
)
raise RuntimeError(f"Payu {job_type} job failed with exit status {exit_status}")

return stdout, stderr, output_files

Expand Down
32 changes: 30 additions & 2 deletions tests/config_tests/test_test_bit_reproducibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,8 @@ def base_test_command(self):
# Minimal test command
test_cmd = (
"model-config-tests -s "
# Use -k to select one test
f"-k {self.test_name} "
# Use -m to specify the test (each repro test has a unique marker)
f"-m {self.test_name.removeprefix('test_')} "
f"--output-path {self.output_path} "
# Keep archive flag will keep any pre-existing archive for the test
# and disable the actual 'payu run' steps
Expand Down Expand Up @@ -500,3 +500,31 @@ def check_checksum(output_path, checksum_path, model_name, match=True):
assert test_checksum.read_text() == checksum_path.read_text()
else:
assert test_checksum.read_text() != checksum_path.read_text()


@pytest.mark.parametrize("fail", [False, True])
def test_test_repro_determinism(tmp_dir, fail):
"""Test repro determinism for some example output"""
test_name = "test_repro_determinism"

exp1_name = "exp_1d_runtime"
exp2_name = "exp_1d_runtime_repeat"

# Setup some example files for both experiments
exp1_helper = CommonTestHelper(test_name, exp1_name, "access", tmp_dir)
exp1_helper.copy_config("release-preindustrial+concentrations")
exp1_helper.create_mock_output("output000", modify=False)

exp2_helper = CommonTestHelper(test_name, exp2_name, "access", tmp_dir)
exp2_helper.create_mock_output("output000", modify=fail)

# Build test command
test_cmd = (
f"{exp1_helper.base_test_command()} "
f"--control-path {exp1_helper.control_path} "
)

# Run test in a subprocess call
result = subprocess.run(shlex.split(test_cmd), capture_output=True, text=True)
# Check test result
assert result.returncode == int(fail)
Loading
Loading