autoatml
diff --git a/‎src/autoplex/auto/rss/flows.py‎
Lines changed: 18 additions & 0 deletions b/‎src/autoplex/auto/rss/flows.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎src/autoplex/auto/rss/jobs.py‎
Lines changed: 11 additions & 0 deletions b/‎src/autoplex/auto/rss/jobs.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎src/autoplex/data/common/jobs.py‎
Lines changed: 14 additions & 4 deletions b/‎src/autoplex/data/common/jobs.py‎
Lines changed: 14 additions & 4 deletions
diff --git a/‎src/autoplex/fitting/common/flows.py‎
Lines changed: 5 additions & 0 deletions b/‎src/autoplex/fitting/common/flows.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/autoplex/fitting/common/jobs.py‎
Lines changed: 17 additions & 3 deletions b/‎src/autoplex/fitting/common/jobs.py‎
Lines changed: 17 additions & 3 deletions
@@ -1,5 +1,6 @@
 """RSS (random structure searching) flow for exploring and learning potential energy surfaces from scratch."""
 
+import logging
 from dataclasses import dataclass, field
 
 from atomate2.forcefields.jobs import ForceFieldStaticMaker
@@ -12,6 +13,10 @@
 from autoplex.misc.castep.jobs import CastepStaticMaker
 from autoplex.settings import RssConfig
 
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+
 
 @dataclass
 class RssMaker(Maker):
@@ -309,6 +314,19 @@ def make(self, **kwargs):
                 "'train_from_scratch' must be set in the configuration file or passed as a keyword argument!!"
             )
 
+        if config_params["disable_testing"] and config_params["test_ratio"] != 0.0:
+            logging.warning("Testing disabled. Setting test_ratio to 0.0.")
+            config_params["test_ratio"] = 0.0
+
+        if (
+            config_params["train_from_scratch"]
+            and config_params["test_ratio"] == 0.0
+            and not config_params["disable_testing"]
+        ):
+            raise ValueError(
+                "A prebuilt test set should be present if testing is enabled and `test_ratio` is set to 0."
+            )
+
         rss_flow = []
 
         if config_params["train_from_scratch"]:
 
@@ -85,6 +85,7 @@ def initial_rss(
     dft_ref_file: str = "dft_ref.extxyz",
     rss_group: str = "initial",
     test_ratio: float = 0.1,
+    disable_testing: bool = False,
     regularization: bool = False,
     retain_existing_sigma: bool = False,
     scheme: str | None = None,
@@ -171,6 +172,8 @@ def initial_rss(
     test_ratio: float
         The proportion of the test set after splitting the data.
         If None, no splitting will be performed. Default is 0.1.
+    disable_testing: bool
+        Whether to disable running the model on test data. Default is False.
     regularization: bool
         If true, apply regularization. This only works for GAP. Default is False.
     retain_existing_sigma: bool
@@ -274,6 +277,7 @@ def initial_rss(
     )
     do_data_preprocessing = preprocess_data(
         test_ratio=test_ratio,
+        disable_testing=disable_testing,
         regularization=regularization,
         retain_existing_sigma=retain_existing_sigma,
         scheme=scheme,
@@ -295,6 +299,7 @@ def initial_rss(
         apply_data_preprocessing=False,
         auto_delta=auto_delta,
         glue_xml=False,
+        disable_testing=disable_testing,
     ).make(
         isolated_atom_energies=do_data_collection.output["isolated_atom_energies"],
         database_dir=do_data_preprocessing.output,
@@ -352,6 +357,7 @@ def do_rss_iterations(
     dft_ref_file: str = "dft_ref.extxyz",
     rss_group: str = "rss",
     test_ratio: float = 0.1,
+    disable_testing: bool = False,
     regularization: bool = False,
     retain_existing_sigma: bool = False,
     scheme: str | None = None,
@@ -479,6 +485,8 @@ def do_rss_iterations(
         Group name for GAP RSS. Default is 'rss'.
     test_ratio: float
         The proportion of the test set after splitting the data. Default is 0.1.
+    disable_testing: bool
+        Whether to disable running the model on test data. Default is False.
     regularization: bool
         If true, apply regularization. This only works for GAP. Default is False.
     retain_existing_sigma: bool
@@ -674,6 +682,7 @@ def do_rss_iterations(
         )
         do_data_preprocessing = preprocess_data(
             test_ratio=test_ratio,
+            disable_testing=disable_testing,
             regularization=regularization,
             retain_existing_sigma=retain_existing_sigma,
             scheme=scheme,
@@ -695,6 +704,7 @@ def do_rss_iterations(
             apply_data_preprocessing=False,
             auto_delta=auto_delta,
             glue_xml=False,
+            disable_testing=disable_testing,
         ).make(
             database_dir=do_data_preprocessing.output,
             isolated_atom_energies=input["isolated_atom_energies"],
@@ -744,6 +754,7 @@ def do_rss_iterations(
             dft_ref_file=dft_ref_file,
             rss_group=rss_group,
             test_ratio=test_ratio,
+            disable_testing=disable_testing,
             regularization=regularization,
             retain_existing_sigma=retain_existing_sigma,
             scheme=scheme,
 
@@ -745,6 +745,7 @@ def safe_strip_hostname(value):
 def preprocess_data(
     dft_ref_dir: str,
     test_ratio: float | None = None,
+    disable_testing: bool = False,
     regularization: bool = False,
     retain_existing_sigma: bool = False,
     scheme: str = "linear-hull",
@@ -758,7 +759,7 @@ def preprocess_data(
     isolated_atom_energies: dict | None = None,
 ) -> Path:
     """
-    Preprocesse data to before fiting machine learning models.
+    Preprocess data to before fitting machine learning models.
 
     This function handles tasks such as splitting the dataset,
     applying regularization, accumulating database, and filtering
@@ -771,6 +772,8 @@ def preprocess_data(
     test_ratio: float
         The proportion of the test set after splitting the data.
         If None, no splitting will be performed.
+    disable_testing: bool
+        Whether to disable running the model on test data. Default is False.
     regularization: bool
         If true, apply regularization. This only works for GAP.
     retain_existing_sigma: bool
@@ -812,14 +815,20 @@ def preprocess_data(
     )
 
     if test_ratio == 0 or test_ratio is None:
-        train_structures, test_structures = atoms, atoms
+        train_structures, test_structures = atoms, []
     else:
         train_structures, test_structures = stratified_dataset_split(
             atoms, test_ratio, energy_label
         )
 
     if pre_database_dir and os.path.exists(pre_database_dir):
-        files_to_copy = ["train.extxyz", "test.extxyz"]
+        files_to_copy = [
+            "train.extxyz",
+        ]
+        if not disable_testing:
+            files_to_copy += [
+                "test.extxyz",
+            ]
         current_working_directory = os.getcwd()
 
         for file_name in files_to_copy:
@@ -830,7 +839,8 @@ def preprocess_data(
                 print(f"File {file_name} has been copied to {destination_file_path}")
 
     write("train.extxyz", train_structures, format="extxyz", append=True)
-    write("test.extxyz", test_structures, format="extxyz", append=True)
+    if not disable_testing:
+        write("test.extxyz", test_structures, format="extxyz", append=True)
 
     if regularization:
         atoms_reg: list[Atoms] = read("train.extxyz", index=":")
 
@@ -86,6 +86,8 @@ class MLIPFitMaker(Maker):
         Determine whether to preprocess the data.
     run_fits_on_different_cluster: bool
         If true, run fits on different clusters.
+    disable_testing: bool
+        Whether to disable running the model on test data.
     """
 
     name: str = "MLpotentialFit"
@@ -110,6 +112,7 @@ class MLIPFitMaker(Maker):
     num_processes_fit: int | None = None
     apply_data_preprocessing: bool = True
     run_fits_on_different_cluster: bool = False
+    disable_testing: bool = False
 
     def make(
         self,
@@ -188,6 +191,7 @@ def make(
                 device=device,
                 species_list=species_list,
                 database_dict=data_prep_job.output["database_dict"],
+                disable_testing=self.disable_testing,
                 **fit_kwargs,
             )
             jobs.append(mlip_fit_job)
@@ -221,6 +225,7 @@ def make(
             ref_virial_name=self.ref_virial_name,
             device=device,
             species_list=species_list,
+            disable_testing=self.disable_testing,
             **fit_kwargs,
         )
 
 
@@ -38,6 +38,7 @@ def machine_learning_fit(
     database_dict: dict | None = None,
     hyperpara_opt: bool = False,
     hyperparameters: MLIP_HYPERS = MLIP_HYPERS,
+    disable_testing: bool = False,
     **fit_kwargs,
 ):
     """
@@ -84,6 +85,8 @@ def machine_learning_fit(
     run_fits_on_different_cluster: bool
         Indicates if fits are to be run on a different cluster.
         If True, the fitting data (train.extxyz, test.extxyz) is stored in the database.
+    disable_testing: bool
+        Whether to disable running the model on test data. Default is False.
     fit_kwargs: dict
         Additional keyword arguments for MLIP fitting.
     """
@@ -125,8 +128,9 @@ def machine_learning_fit(
     if mlip_type == "GAP":
         for train_name, test_name in zip(train_files, test_files):
             if (database_dir / train_name).exists() and (
-                database_dir / test_name
-            ).exists():
+                (database_dir / test_name).exists() or disable_testing
+            ):
+
                 train_test_error = gap_fitting(
                     db_dir=database_dir,
                     hyperparameters=hyperparameters.GAP,
@@ -140,6 +144,7 @@ def machine_learning_fit(
                     ref_virial_name=ref_virial_name,
                     train_name=train_name,
                     test_name=test_name,
+                    disable_testing=disable_testing,
                     fit_kwargs=fit_kwargs,
                 )
                 mlip_paths.append(train_test_error["mlip_path"])
@@ -153,6 +158,7 @@ def machine_learning_fit(
             ref_force_name=ref_force_name,
             ref_virial_name=ref_virial_name,
             num_processes_fit=num_processes_fit,
+            disable_testing=disable_testing,
             fit_kwargs=fit_kwargs,
         )
         mlip_paths.append(train_test_error["mlip_path"])
@@ -169,6 +175,7 @@ def machine_learning_fit(
             ref_virial_name=ref_virial_name,
             species_list=species_list,
             gpu_identifier_indices=gpu_identifier_indices,
+            disable_testing=disable_testing,
             fit_kwargs=fit_kwargs,
         )
 
@@ -182,6 +189,7 @@ def machine_learning_fit(
             ref_energy_name=ref_energy_name,
             ref_force_name=ref_force_name,
             ref_virial_name=ref_virial_name,
+            disable_testing=disable_testing,
             fit_kwargs=fit_kwargs,
             device=device,
         )
@@ -194,6 +202,7 @@ def machine_learning_fit(
             ref_energy_name=ref_energy_name,
             ref_force_name=ref_force_name,
             ref_virial_name=ref_virial_name,
+            disable_testing=disable_testing,
             fit_kwargs=fit_kwargs,
             device=device,
         )
@@ -211,7 +220,12 @@ def machine_learning_fit(
         )
         mlip_paths.append(train_test_error["mlip_path"])
 
-    check_conv = check_convergence(train_test_error["test_error"])
+    error = (
+        train_test_error["train_error"]
+        if disable_testing
+        else train_test_error["test_error"]
+    )
+    check_conv = check_convergence(error)
 
     return {
         "mlip_path": mlip_paths,