exp35: InferenceSettings.freeze_params (skip weight-grad backward kernels)

Michael Dzamba · Michael Dzamba · commit b2ff855b4c61 · 2026-04-29T17:29:19.000-07:00
Adds an opt-in flag that calls requires_grad_(False) on every model
parameter at predict-time _lazy_init. For gradient-force inference,
autograd then skips the weight-grad path of every Linear / segment_mm
backward, saving CUDA time and peak memory. The win is conditional —
helps when paired with moe_layer_type=fairchem_cpp, can regress under
tf32+pytorch MOLE due to cuBLAS fused-(dx,dW) kernel selection. Off by
default.
diff --git a/src/fairchem/core/units/mlip_unit/api/inference.py b/src/fairchem/core/units/mlip_unit/api/inference.py
@@ -72,6 +72,17 @@ class InferenceSettings:
     # Flag to enable or disable the compilation of the inference model.
     compile: bool = False
 
+    # Freeze model parameters at inference (requires_grad=False on all
+    # weights). For gradient-force models, autograd then short-circuits
+    # the dW backward kernels for every Linear/segment_mm in the
+    # backward pass that produces forces. With moe_layer_type=fairchem_cpp
+    # this is a clear win because segment_mm fwd / bwd are separate
+    # kernels and the bwd skip is pure savings; with pytorch MOLE under
+    # tf32 it can be a regression because cuBLAS dispatches a fused
+    # (dx, dW) Tensor Core path that's not selected when only dx is
+    # requested. Default off; opt-in.
+    freeze_params: bool = False
+
     # Deprecated
     # Flag to enable or disable the use of CUDA Graphs for compute
     # This flag is no longer used and will be removed in future versions
diff --git a/src/fairchem/core/units/mlip_unit/predict.py b/src/fairchem/core/units/mlip_unit/predict.py
@@ -461,6 +461,10 @@ def _lazy_init(self, data: AtomicData) -> None:
 
         self.move_to_device()
 
+        if getattr(self.inference_settings, "freeze_params", False):
+            for p in self.model.parameters():
+                p.requires_grad_(False)
+
         if self.inference_settings.compile:
             logging.warning(
                 "Model is being compiled this might take a while for the first time"