Merge branch 'main' into formation-energy-calculator

lbluque · web-flow · commit 6ba77f59e7c6 · 2025-11-20T15:51:02.000-08:00
diff --git a/.github/release-drafter-data-omc.yml b/.github/release-drafter-data-omc.yml
@@ -0,0 +1,37 @@
+include-paths:
+  - src/fairchem/data/omc
+  - packages/fairchem-data-omc
+tag-prefix: fairchem_data_omc
+tag-template: 'fairchem_data_omc-$RESOLVED_VERSION'
+name-template: 'fairchem_data_omc-$RESOLVED_VERSION'
+exclude-contributors: [github-actions]
+categories:
+  - title: New Features / Enhancements
+    labels: [enhancement]
+  - title: Bug Fixes
+    labels: [bug]
+  - title: Documentation
+    labels: [documentation]
+  - title: Tests
+    labels: [test]
+  - title: Deprecations
+    labels: [deprecation]
+  - title: Dependencies
+    labels: [dependencies]
+  - title: Other Changes
+    labels: ["*"]
+version-resolver:
+  major:
+    labels:
+      - 'major'
+  minor:
+    labels:
+      - 'minor'
+  patch:
+    labels:
+      - 'patch'
+  default: patch
+template: |
+  ## What’s Changed
+
+  $CHANGES
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -26,7 +26,7 @@ jobs:
       - name: Build
         run: |
           # add packages that are supposed to be built to this list
-          for package in fairchem-core fairchem-data-oc fairchem-demo-ocpapi fairchem-applications-cattsunami fairchem-data-omol fairchem-data-omat fairchem-lammps
+          for package in fairchem-core fairchem-data-oc fairchem-demo-ocpapi fairchem-applications-cattsunami fairchem-data-omol fairchem-data-omat fairchem-data-omc fairchem-lammps
           do
             pushd packages/$package
             hatch build
@@ -68,5 +68,11 @@ jobs:
       - name: Upload omat artifact
         uses: actions/upload-artifact@v5
         with:
-          name: dist-omat
-          path: dist-omat/*
+          name: dist-data-omat
+          path: dist-data-omat/*
+
+      - name: Upload omc artifact
+        uses: actions/upload-artifact@v5
+        with:
+          name: dist-data-omc
+          path: dist-data-omc/*
diff --git a/.github/workflows/release-drafter-data-omc.yml b/.github/workflows/release-drafter-data-omc.yml
@@ -0,0 +1,28 @@
+name: Release Drafter - fairchem-data-omc
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - 'src/fairchem/data/omc/**'
+      - 'packages/fairchem-data-omc/**'
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  update_release_draft:
+    permissions:
+        # write permission is required to create a github release
+        contents: write
+        pull-requests: read
+    runs-on: ubuntu-latest
+    steps:
+      - uses: release-drafter/release-drafter@v6
+        with:
+          disable-autolabeler: true
+          config-name: release-drafter-data-omc.yml
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -178,3 +178,53 @@ jobs:
           packages-dir: dist-lammps/
           skip-existing: true
           verbose: true
+
+  release-data-omat:
+    needs: [ build ]
+    runs-on: ubuntu-latest
+    if: |
+      ( github.event.inputs.release-pypi == 'true' && startsWith(github.ref_name, 'fairchem_data_omat-') ) || startsWith(github.event.release.tag_name, 'fairchem_data_omat-')
+
+    environment:
+      name: pypi
+      url: https://pypi.org/p/fairchem-data-omat/
+
+    permissions:
+      id-token: write
+
+    steps:
+      - uses: actions/download-artifact@v6
+        with:
+          name: dist-data-omat
+          path: dist-data-omat
+
+      - uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          verbose: true
+          packages-dir: dist-data-omat/
+          skip-existing: true
+
+  release-data-omc:
+    needs: [ build ]
+    runs-on: ubuntu-latest
+    if: |
+      ( github.event.inputs.release-pypi == 'true' && startsWith(github.ref_name, 'fairchem_data_omc-') ) || startsWith(github.event.release.tag_name, 'fairchem_data_omc-')
+
+    environment:
+      name: pypi
+      url: https://pypi.org/p/fairchem-data-omc/
+
+    permissions:
+      id-token: write
+
+    steps:
+      - uses: actions/download-artifact@v6
+        with:
+          name: dist-data-omc
+          path: dist-data-omc
+
+      - uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          verbose: true
+          packages-dir: dist-data-omc/
+          skip-existing: true
diff --git a/docs/_toc.yml b/docs/_toc.yml
@@ -51,10 +51,12 @@ parts:
   - file: catalysts/datasets/summary
     sections:
       - file: catalysts/datasets/oc20
+      - file: catalysts/datasets/oc20_mads
       - file: catalysts/datasets/oc22
       - file: catalysts/datasets/oc20dense
       - file: catalysts/datasets/oc20neb
       - file: catalysts/datasets/ocx24
+      - file: catalysts/datasets/oc25
   - file: catalysts/models
   - file: catalysts/examples_tutorials/summary
     sections:
diff --git a/docs/catalysts/datasets/oc20_mads.md b/docs/catalysts/datasets/oc20_mads.md
@@ -0,0 +1,21 @@
+
+# Open Catalyst 2020 Multi-Adsorbate (mAds) Dataset
+
+## Overview
+The OC20-mAds dataset is a training set expanding the original OC20 dataset to include multi-adsorbate and coverage effects on catalyst surfaces. Adsorbates are randomly sampled from the list of OC20 adsorbates, up to 5 maximum adsorbates. For a small fraction of the dataset, all adsorbates on the surface may be identical. OC20-mAds is introduced in the [UMA paper](https://arxiv.org/pdf/2506.23971).
+## File Contents and Download
+|Splits |Size | MD5 checksum (download link)   |
+|---     |---    |---     |
+|Train   |   21,804,758   | [6435960ba5ad1a7c949bd2f2b51825bc](https://dl.fbaipublicfiles.com/opencatalystproject/data/oc20mAds/oc20_multiads_train.tar.gz)   |
+
+The following metadata can be accessed in the respective `atoms.info` entry:
+
+- `bulk_id`: Bulk identifier
+- `millers`: 3-tuple of integers indicating the Miller indices of the surface.
+- `shift`: C-direction shift used to determine cutoff for the surface (c-direction is following the nomenclature from Pymatgen).
+- `top`: Boolean indicating whether the chosen surface was at the top or bottom of the originally enumerated surface.
+- `adsorbates`: List of adsorbates sampled and their respective placements.
+- `sid`: Unique system identifier.
+- `fid`: Frame index along the relaxation/AIMD trajectory.
+- `results_path`: Internal results location.
+- `fmax`: Max per-atom force.
diff --git a/src/fairchem/core/launchers/cluster/ray_cluster.py b/src/fairchem/core/launchers/cluster/ray_cluster.py
@@ -326,6 +326,7 @@ def __init__(
     def start_head(
         self,
         requirements: dict[str, int | str],
+        name: str = "default",
         executor: str = "slurm",
         payload: Optional[Callable[..., PayloadReturnT]] = None,
         **kwargs,
@@ -341,7 +342,7 @@ def start_head(
             cluster=executor,
         )
         s_executor.update_parameters(
-            name=f"ray_head_{self.state.cluster_id}",  # TODO name should probably include more details (cluster_id)
+            name=f"ray_head_{name}_{self.state.cluster_id}",  # TODO name should probably include more details (cluster_id)
             **requirements,
         )
         head_job = s_executor.submit(
@@ -360,6 +361,7 @@ def start_workers(
         self,
         num_workers: int,
         requirements: dict[str, int | str],
+        name: str = "default",
         executor: str = "slurm",
     ) -> list[str]:
         """
@@ -370,7 +372,7 @@ def start_workers(
         # start the workers
         s_executor = submitit.AutoExecutor(folder=str(self.log_dir), cluster=executor)
         s_executor.update_parameters(
-            name=f"ray_worker_{self.num_worker_groups}_{self.state.cluster_id}",  # TODO name should probably include more details (cluster_id)
+            name=f"ray_worker_{name}_{self.num_worker_groups}_{self.state.cluster_id}",  # TODO name should probably include more details (cluster_id)
             **requirements,
         )
 
diff --git a/src/fairchem/core/launchers/ray_on_slurm_launch.py b/src/fairchem/core/launchers/ray_on_slurm_launch.py
@@ -202,6 +202,7 @@ def ray_on_slurm_launch(config: DictConfig, log_dir: str):
 
     all_job_ids = []
     head_job_id = cluster.start_head(
+        name=config.job.run_name,
         requirements=cluster_reqs
         | {
             "nodes": 1,
@@ -220,6 +221,7 @@ def ray_on_slurm_launch(config: DictConfig, log_dir: str):
     if worker_nodes > 0:
         worker_ids = cluster.start_workers(
             1,
+            name=config.job.run_name,
             requirements=cluster_reqs
             | {
                 "nodes": worker_nodes,
diff --git a/src/fairchem/core/models/uma/escn_md.py b/src/fairchem/core/models/uma/escn_md.py
@@ -48,7 +48,51 @@
     from fairchem.core.datasets.atomic_data import AtomicData
 
 
-ESCNMD_DEFAULT_EDGE_CHUNK_SIZE = 1024 * 128
+ESCNMD_DEFAULT_EDGE_ACTIVATION_CHECKPOINT_CHUNK_SIZE = 1024 * 128
+
+
+def add_n_empty_edges(graph_dict: dict, edges_to_add: int, cutoff: float):
+    graph_dict["edge_index"] = torch.cat(
+        (
+            graph_dict["edge_index"].new_ones(2, edges_to_add)
+            * graph_dict["node_offset"],
+            graph_dict["edge_index"],
+        ),
+        dim=1,
+    )
+
+    self_edge_distance_vec = graph_dict["edge_distance_vec"].new_ones(1, 3) + cutoff
+    graph_dict["edge_distance_vec"] = torch.cat(
+        (
+            self_edge_distance_vec.expand(edges_to_add, 3),
+            graph_dict["edge_distance_vec"],
+        ),
+        dim=0,
+    )
+
+    edge_distance = torch.linalg.norm(self_edge_distance_vec, dim=-1, keepdim=False)
+    graph_dict["edge_distance"] = torch.cat(
+        (edge_distance.expand(edges_to_add), graph_dict["edge_distance"]), dim=0
+    )
+
+
+@torch.compiler.disable
+def pad_edges(graph_dict, edge_chunk_size: int, cutoff: float):
+    n_edges = n_edges_post = graph_dict["edge_index"].shape[1]
+
+    if edge_chunk_size > 0 and n_edges_post % edge_chunk_size != 0:
+        # make sure we have a multiple of self.edge_chunk_size edges
+        n_edges_post += edge_chunk_size - n_edges_post % edge_chunk_size
+
+    n_edges_post = max(n_edges_post, 1)  # at least 1 edge to avoid empty "edge" case
+    if n_edges_post > n_edges:
+        # We append synthetic padding edges whose distance vector has norm > cutoff
+        # (see add_n_empty_edges where distance_vec is set to 1+cutoff). The radial
+        # polynomial envelope returns 0 for distances >= cutoff, so these edges never
+        # contribute to embeddings or message passing; they only ensure the edge count
+        # is a multiple of edge_chunk_size (or at least one edge), aiding chunked
+        # activation checkpointing and avoiding empty tensor edge cases.
+        add_n_empty_edges(graph_dict, n_edges_post - n_edges, cutoff)
 
 
 @registry.register_model("escnmd_backbone")
@@ -88,6 +132,7 @@ def __init__(
         use_cuda_graph_wigner: bool = False,
         radius_pbc_version: int = 1,
         always_use_pbc: bool = True,
+        edge_chunk_size: int | None = None,
     ) -> None:
         super().__init__()
         self.max_num_elements = max_num_elements
@@ -116,7 +161,10 @@ def __init__(
         activation_checkpoint_chunk_size = None
         if activation_checkpointing:
             # The size of edge blocks to use in activation checkpointing
-            activation_checkpoint_chunk_size = ESCNMD_DEFAULT_EDGE_CHUNK_SIZE
+            activation_checkpoint_chunk_size = (
+                ESCNMD_DEFAULT_EDGE_ACTIVATION_CHECKPOINT_CHUNK_SIZE
+            )
+        self.edge_chunk_size = edge_chunk_size
 
         # related to charge spin dataset system embedding
         self.chg_spin_emb_type = chg_spin_emb_type
@@ -401,6 +449,9 @@ def _generate_graph(self, data_dict):
             ]
             data_dict["batch"] = data_dict["batch_full"][graph_dict["node_partition"]]
 
+        if self.edge_chunk_size is not None:
+            pad_edges(graph_dict, self.edge_chunk_size, self.cutoff)
+
         return graph_dict
 
     @conditional_grad(torch.enable_grad())
@@ -533,7 +584,6 @@ def _init_gp_partitions(self, graph_dict, atomic_numbers_full):
             torch.arange(len(atomic_numbers_full)).to(atomic_numbers_full.device),
             gp_utils.get_gp_world_size(),
         )[gp_utils.get_gp_rank()]
-
         assert (
             node_partition.numel() > 0
         ), "Looks like there is no atoms in this graph paralell partition. Cannot proceed"
@@ -551,7 +601,6 @@ def _init_gp_partitions(self, graph_dict, atomic_numbers_full):
         graph_dict["edge_distance_vec"] = graph_dict["edge_distance_vec"][
             edge_partition
         ]
-
         return graph_dict
 
     @property
diff --git a/src/fairchem/core/units/mlip_unit/api/inference.py b/src/fairchem/core/units/mlip_unit/api/inference.py
@@ -87,6 +87,8 @@ class InferenceSettings:
     # Number of internal torch threads to use for inference
     torch_num_threads: int | None = None
 
+    edge_chunk_size: int | None = None
+
 
 # this is most general setting that works for most systems and models,
 # not optimized for speed
diff --git a/src/fairchem/core/units/mlip_unit/predict.py b/src/fairchem/core/units/mlip_unit/predict.py
@@ -134,6 +134,10 @@ def __init__(
             overrides["backbone"]["activation_checkpointing"] = (
                 inference_settings.activation_checkpointing
             )
+        if inference_settings.edge_chunk_size is not None:
+            overrides["backbone"]["edge_chunk_size"] = (
+                inference_settings.edge_chunk_size
+            )
         if inference_settings.external_graph_gen is not None:
             overrides["backbone"][
                 "otf_graph"
@@ -248,12 +252,6 @@ def predict(
                 self.model = torch.compile(self.model, dynamic=True)
             self.lazy_model_intialized = True
 
-        if self.inference_mode.external_graph_gen and data.edge_index.shape[1] == 0:
-            raise ValueError(
-                "Cannot run inference with external graph generation on empty edge index. "
-                "Please ensure the input data has valid edges."
-            )
-
         # this needs to be .clone() to avoid issues with graph parallel modifying this data with MOLE
         data_device = data.to(self.device).clone()
 
@@ -479,6 +477,7 @@ def __init__(
         if not ray.is_initialized():
             ray.init(
                 logging_level=logging.INFO,
+                num_cpus=num_workers_per_node,
                 # runtime_env={
                 #     "env_vars": {"RAY_DEBUG": "1"},
                 # },
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -10,11 +10,10 @@
 
 import random
 from contextlib import suppress
-
+import ray
 import numpy as np
 import pytest
 import torch
-
 import fairchem.core.common.gp_utils as gp_utils
 from fairchem.core.common import distutils
 
@@ -140,6 +139,12 @@ def water_xyz_file(tmp_path_factory):
 
 @pytest.fixture(autouse=True)
 def setup_before_each_test():
+    ray.shutdown()
+    if gp_utils.initialized():
+        gp_utils.cleanup_gp()
+    distutils.cleanup()
+    yield
+    ray.shutdown()
     if gp_utils.initialized():
         gp_utils.cleanup_gp()
     distutils.cleanup()
diff --git a/tests/core/units/mlip_unit/test_inference_modes.py b/tests/core/units/mlip_unit/test_inference_modes.py
diff --git a/tests/core/units/mlip_unit/test_mlip_unit.py b/tests/core/units/mlip_unit/test_mlip_unit.py
diff --git a/tests/core/units/mlip_unit/test_predict.py b/tests/core/units/mlip_unit/test_predict.py