Consolidate turbo and turbo_umas (#1898)

rayg1234 · misko · web-flow · commit 5f9aa7994e16 · 2026-03-15T23:47:20.000Z
Adding `turbo_umas` mode was not a good choice and is too confusing for users. This will just go back to having a single `turbo` mode and select the appropriate acceleration backend automatically Mentioned in #1872 --------- Co-authored-by: misko <misko@meta.com>
diff --git a/docs/core/common_tasks/ase_calculator.md b/docs/core/common_tasks/ase_calculator.md
@@ -87,7 +87,7 @@ The advanced user might quickly see that **default** mode and **turbo** mode are
 | edge_chunk_size | Experimental. Used for padding edge sizes. This helps reduce re-compilations from torch compile, default to None |
 | use_quaternion_wigner | enable quaternion-based Wigner D matrix computation. If false we fall back to euler-angle based rotations. default True. |
 | base_precision_dtype | governs the main precision type of the computation, default to FP32, FP64 is also supported |
-| execution_mode | This allows manually toggling custom backends to maximize speed ups. default to "general". "umas-fast-gpu" will introduce 30-40% speedup for uma-s line of models. |
+| execution_mode | This allows manually toggling custom backends to maximize speed ups. default to "None", when set to "None", the predictor will automatically determine the best backend. For example, "umas-fast-gpu" will introduce 30-40% speedup for uma-s line of models. |
 
 For example, for an MD simulation use-case for a system of ~500 atoms, we can choose to use a custom mode like the following:
 
diff --git a/src/fairchem/core/models/uma/escn_md.py b/src/fairchem/core/models/uma/escn_md.py
@@ -976,7 +976,7 @@ def prepare_for_inference(self, data: AtomicData, settings: InferenceSettings):
         self._merged_composition = None
 
         # Validate settings against backend requirements (fail early)
-        self.backend.validate(self, settings)
+        self.backend.validate(self.lmax, self.mmax, settings)
 
         if settings.merge_mole:
             assert (
diff --git a/src/fairchem/core/models/uma/nn/execution_backends.py b/src/fairchem/core/models/uma/nn/execution_backends.py
@@ -7,6 +7,7 @@
 
 from __future__ import annotations
 
+from dataclasses import replace
 from enum import Enum
 from typing import TYPE_CHECKING
 
@@ -15,14 +16,17 @@
 from fairchem.core.models.uma.nn.unified_radial import UnifiedRadialMLP
 
 if TYPE_CHECKING:
-    from fairchem.core.units.mlip_unit.api.inference import InferenceSettings
+    from fairchem.core.units.mlip_unit.api.inference import (
+        InferenceSettings,
+    )
 
 __all__ = [
     "ExecutionMode",
     "ExecutionBackend",
     "UMASFastPytorchBackend",
     "UMASFastGPUBackend",
     "get_execution_backend",
+    "maybe_update_settings_backend",
 ]
 
 # Indices for m=0 spherical harmonic coefficients in L-major ordering (lmax=2)
@@ -58,18 +62,19 @@ class ExecutionBackend:
 
     @staticmethod
     def validate(
-        model: torch.nn.Module,
-        settings: InferenceSettings | None = None,
+        lmax: int,
+        mmax: int,
+        settings: InferenceSettings,
     ) -> None:
         """
-        Validate that model and settings are compatible with this backend.
+        Validate that model parameters and settings are compatible with this backend.
 
-        Called during model construction (settings=None) and before
-        first inference (settings provided).
+        Called before first inference.
 
         Args:
-            model: The backbone model to validate.
-            settings: Inference settings, or None at construction time.
+            lmax: Maximum degree of spherical harmonics.
+            mmax: Maximum order of spherical harmonics.
+            settings: Inference settings.
 
         Raises:
             ValueError: If incompatible with this backend.
@@ -265,17 +270,13 @@ class UMASFastPytorchBackend(ExecutionBackend):
 
     @staticmethod
     def validate(
-        model: torch.nn.Module,
-        settings: InferenceSettings | None = None,
+        lmax: int,
+        mmax: int,
+        settings: InferenceSettings,
     ) -> None:
         """
         Validate that settings are compatible with fast pytorch mode.
         """
-        # Check activation_checkpointing from model (chunk_size is None when disabled)
-        if model.edge_degree_embedding.activation_checkpoint_chunk_size is not None:
-            raise ValueError(
-                "UMASFastPytorchBackend requires activation_checkpointing=False"
-            )
         # Also reject if user tries to enable it via inference settings
         if settings is not None and settings.activation_checkpointing:
             raise ValueError(
@@ -338,15 +339,16 @@ class UMASFastGPUBackend(UMASFastPytorchBackend):
 
     @staticmethod
     def validate(
-        model: torch.nn.Module,
-        settings: InferenceSettings | None = None,
+        lmax: int,
+        mmax: int,
+        settings: InferenceSettings,
     ) -> None:
-        UMASFastPytorchBackend.validate(model, settings)
+        UMASFastPytorchBackend.validate(lmax, mmax, settings)
         if not torch.cuda.is_available():
             raise ValueError("umas_fast_gpu requires CUDA")
-        if model.lmax != 2 or model.mmax != 2:
+        if lmax != 2 or mmax != 2:
             raise ValueError("umas_fast_gpu requires lmax==2 and mmax==2")
-        if settings is not None and not settings.merge_mole:
+        if not settings.merge_mole:
             raise ValueError("umas_fast_gpu requires merge_mole=True")
 
     @staticmethod
@@ -446,3 +448,33 @@ def get_execution_backend(
         available = [m.value for m in _EXECUTION_BACKENDS]
         raise ValueError(f"Unknown execution mode: {mode}. Available: {available}")
     return _EXECUTION_BACKENDS[mode]()
+
+
+def maybe_update_settings_backend(
+    settings: InferenceSettings,
+    model_config: dict,
+) -> InferenceSettings:
+    """
+    Update inference settings to use UMAS_FAST_GPU if conditions are met.
+
+    Sets execution_mode to UMAS_FAST_GPU if:
+    - execution_mode is not already set
+    - UMASFastGPUBackend.validate passes for the model and settings
+
+    Args:
+        settings: Current inference settings.
+        model_config: The model configuration dictionary to validate.
+
+    Returns:
+        Updated inference settings with the appropriate execution mode.
+    """
+    if settings.execution_mode is not None:
+        return settings
+
+    try:
+        lmax = model_config["backbone"]["lmax"]
+        mmax = model_config["backbone"]["mmax"]
+        UMASFastGPUBackend.validate(lmax, mmax, settings)
+        return replace(settings, execution_mode=ExecutionMode.UMAS_FAST_GPU)
+    except (ValueError, KeyError):
+        return settings
diff --git a/src/fairchem/core/units/mlip_unit/api/inference.py b/src/fairchem/core/units/mlip_unit/api/inference.py
@@ -101,10 +101,12 @@ class InferenceSettings:
     # Accepts a torch.dtype or a string in ALLOWED_DTYPES (e.g. "float32").
     base_precision_dtype: torch.dtype | str = torch.float32
 
-    # Execution backend mode for the backbone. The default is "general".
+    # Execution backend mode for the backbone.
+    # Set to "general" for the default execution mode that works across all models and hardware.
     # Set to "umas_fast_pytorch" to enable block-diagonal SO2 GEMM conversion for faster inference.
     # Set to "umas_fast_gpu" to enable highly optimized backend with triton kernels for maximum speed.
-    execution_mode: str = "general"
+    # If None, the predictor will decide the best execution mode based on the model and hardware capabilities (e.g., will choose "umas_fast_gpu" for uma-s if running on compatible Nvidia GPU).
+    execution_mode: str | None = None
 
     # New fields for untrained derivative properties
     # These flags request computation of properties NOT in the checkpoint's task list.
@@ -156,7 +158,6 @@ def inference_settings_default():
         compile=False,
         external_graph_gen=False,
         internal_graph_gen_version=2,
-        execution_mode="general",
     )
 
 
@@ -175,19 +176,6 @@ def inference_settings_turbo():
     )
 
 
-# this setting is specific for UMA-S on cuda for maximum speed.
-def inference_settings_turbo_umas():
-    return InferenceSettings(
-        tf32=True,
-        activation_checkpointing=False,
-        merge_mole=True,
-        compile=True,
-        external_graph_gen=False,
-        internal_graph_gen_version=2,
-        execution_mode="umas_fast_gpu",
-    )
-
-
 # this mode corresponds to the default settings used for training and evaluation
 def inference_settings_traineval():
     return InferenceSettings(
@@ -203,7 +191,6 @@ def inference_settings_traineval():
     "default": inference_settings_default(),
     "turbo": inference_settings_turbo(),
     "traineval": inference_settings_traineval(),
-    "turbo_umas": inference_settings_turbo_umas(),
 }
 
 
diff --git a/src/fairchem/core/units/mlip_unit/predict.py b/src/fairchem/core/units/mlip_unit/predict.py
@@ -36,6 +36,9 @@
     setup_env_local_multi_gpu,
 )
 from fairchem.core.datasets.atomic_data import AtomicData, warn_if_upcasting
+from fairchem.core.models.uma.nn.execution_backends import (
+    maybe_update_settings_backend,
+)
 from fairchem.core.units.mlip_unit import InferenceSettings
 from fairchem.core.units.mlip_unit.mlip_unit import OutputSpec, Task
 from fairchem.core.units.mlip_unit.single_atom_patch import (
@@ -114,7 +117,7 @@ def __init__(
         self.inference_settings = inference_settings
         self._setup_threads(inference_settings)
 
-        if inference_settings.wigner_cuda:
+        if self.inference_settings.wigner_cuda:
             logging.warning(
                 "The wigner_cuda flag is deprecated and will be removed in future versions."
             )
@@ -124,16 +127,21 @@ def __init__(
             inference_model_path, map_location="cpu", weights_only=False
         )
 
+        # if the model is uma-s and the execution mode is not explicitly set, default to the optimized uma-s gpu execution mode
+        self.inference_settings = maybe_update_settings_backend(
+            self.inference_settings, checkpoint.model_config
+        )
+
         # Build model-specific overrides
         final_overrides = self._build_overrides_from_settings(
-            checkpoint, overrides, inference_settings
+            checkpoint, overrides, self.inference_settings
         )
 
         # Set default dtype during model construction so that non-persistent
         # buffers (SO3_Grid matrices, CoefficientMapping) are created at the
         # requested precision rather than being cast from float32 later.
         prev_dtype = torch.get_default_dtype()
-        torch.set_default_dtype(inference_settings.base_precision_dtype)
+        torch.set_default_dtype(self.inference_settings.base_precision_dtype)
 
         try:
             # Load model with overrides, passing pre-loaded checkpoint
@@ -151,17 +159,17 @@ def __init__(
 
         # Get backbone's default untrained tasks (if supported and enabled)
         default_backbone_tasks = []
-        if inference_settings.auto_add_default_untrained_tasks:
+        if self.inference_settings.auto_add_default_untrained_tasks:
             backbone = self.model.module.backbone
             if hasattr(backbone, "get_default_untrained_tasks"):
                 default_backbone_tasks = backbone.get_default_untrained_tasks(
                     self.model.module.tasks,
-                    inference_settings,
+                    self.inference_settings,
                 )
 
         # Create explicitly requested untrained tasks
         untrained_tasks = self._create_untrained_tasks(
-            inference_settings, self.model.module.tasks
+            self.inference_settings, self.model.module.tasks
         )
 
         explicit_task_names = {t.name for t in untrained_tasks}
diff --git a/tests/core/models/uma/uma_fast/test_execution_backends.py b/tests/core/models/uma/uma_fast/test_execution_backends.py
diff --git a/tests/core/units/mlip_unit/test_predict.py b/tests/core/units/mlip_unit/test_predict.py