Parallelize lindensity (#5007)

tulga-rdn · web-flow · commit e213f2be8e87 · 2025-04-18T11:28:58.000-07:00
* Fixes #4678 * Parallelizes the mass and charge density profile calculation class (MDAnalysis.analysis.lineardensity.LinearDensity). As density profiles are computed independently for each timestep, the current parallelization methods allow the calculation of the density profiles without any problems. * Need to initialize masses and charges in `__init__` to enable parallelization + added code comments * added testing (boilerplate fixture to testsuite/analysis/conftest.py, analogous with existing ones and a client_... fixture to all tests using in testsuite/MDAnalysisTests/analysis/test_lineardensity.py) * cleanup: removed LinearDensity.totalmass attribute: was neither used nor documented and could contain suprising values if UpdatingAtomGroups were used * update AUTHOS * update CHANGELOG
diff --git a/package/AUTHORS b/package/AUTHORS
@@ -256,6 +256,7 @@ Chronological list of authors
   - James Rowe
   - Debasish Mohanty  
   - Abdulrahman Elbanna
+  - Tulga-Erdene Sodjargal
 
 
 External code
diff --git a/package/CHANGELOG b/package/CHANGELOG
@@ -15,7 +15,7 @@ The rules for this file:
 
 -------------------------------------------------------------------------------
 ??/??/?? IAlibay, orbeckst, BHM-Bob, TRY-ER, Abdulrahman-PROG, pbuslaev,
-         yuxuanzhuang, yuyuan871111, tanishy7777
+         yuxuanzhuang, yuyuan871111, tanishy7777, tulga-rdn
 
 
  * 2.10.0
@@ -33,6 +33,7 @@ Fixes
  * Fixes the benchmark `SimpleRmsBench` in `benchmarks/analysis/rms.py`
    by changing the way weights for RMSD are calculated, instead of
    directly passing them. (Issue #3520, PR #5006)
+
 Enhancements
  * Improve parsing of topology information from LAMMPS dump files to allow
    reading of mass, charge and element attributes. (Issue #3449, PR #4995)
@@ -45,8 +46,12 @@ Enhancements
    force the use of ChainID as the segID when reading PDB (it is helpful 
    if the segment IDs (segids) are partially missing in the PDB file). 
    (Issue #4948 #2874, PR #4965)
+ * Enables parallelization for analysis.lineardensity.LinearDensity
+   (Issue #4678, PR #5007)
 
 Changes
+ * Removed undocumented and unused attribute
+   `analysis.lineardensity.LinearDensity.totalmass` (PR #5007)
 
 Deprecations
 
diff --git a/package/MDAnalysis/analysis/lineardensity.py b/package/MDAnalysis/analysis/lineardensity.py
@@ -36,6 +36,7 @@
 from MDAnalysis.analysis.base import AnalysisBase, Results
 from MDAnalysis.units import constants
 from MDAnalysis.lib.util import deprecate
+from MDAnalysis.analysis.results import ResultsGroup
 
 
 # TODO: Remove in version 3.0.0
@@ -188,6 +189,10 @@ class LinearDensity(AnalysisBase):
           It contains the bin edges of the histrogram bins for calculated
           densities and can be used for easier plotting of histogram data.
 
+    .. versionchanged:: 2.10.0
+       *  Introduced :meth:`get_supported_backends` allowing for parallel execution
+          on :mod:`multiprocessing` and :mod:`dask` backends.
+       *  Removed undocumented and unused attribute :attr:`totalmass`.
 
     .. deprecated:: 2.2.0
        The `results` dictionary has been changed and the attributes
@@ -198,6 +203,16 @@ class LinearDensity(AnalysisBase):
        and :attr:`results.x.charge_density_stddev` instead.
     """
 
+    _analysis_algorithm_is_parallelizable = True
+
+    @classmethod
+    def get_supported_backends(cls):
+        return (
+            "serial",
+            "multiprocessing",
+            "dask",
+        )
+
     def __init__(self, select, grouping="atoms", binsize=0.25, **kwargs):
         super(LinearDensity, self).__init__(
             select.universe.trajectory, **kwargs
@@ -242,13 +257,56 @@ def __init__(self, select, grouping="atoms", binsize=0.25, **kwargs):
             for key in self.keys:
                 self.results[dim][key] = np.zeros(self.nbins)
 
-        # Variables later defined in _single_frame() method
-        self.masses = None
-        self.charges = None
-        self.totalmass = None
+        # Get masses and charges for the selection (e.g. UpdatingAtomGroup)
+        if self.grouping == "atoms":
+            self.masses = self._ags[0].masses
+            self.charges = self._ags[0].charges
+
+        elif self.grouping in ["residues", "segments", "fragments"]:
+            self.masses = self._ags[0].total_mass(compound=self.grouping)
+            self.charges = self._ags[0].total_charge(compound=self.grouping)
+
+        else:
+            raise AttributeError(
+                f"{self.grouping} is not a valid value for grouping."
+            )
+
+    @staticmethod
+    def _custom_aggregator(results):
+        # NB: the *stddev values here are not the standard deviation,
+        # but the variance. The stddev is calculated in _conclude()
+        mass_density = np.sum(
+            [entry["mass_density"] for entry in results], axis=0
+        )
+        mass_density_stddev = np.sum(
+            [entry["mass_density_stddev"] for entry in results], axis=0
+        )
+        charge_density = np.sum(
+            [entry["charge_density"] for entry in results], axis=0
+        )
+        charge_density_stddev = np.sum(
+            [entry["charge_density_stddev"] for entry in results], axis=0
+        )
+        return Results(
+            dim=results[0]["dim"],
+            slice_volume=results[0]["slice_volume"],
+            hist_bin_edges=results[0]["hist_bin_edges"],
+            mass_density=mass_density,
+            mass_density_stddev=mass_density_stddev,
+            charge_density=charge_density,
+            charge_density_stddev=charge_density_stddev,
+        )
+
+    def _get_aggregator(self):
+        return ResultsGroup(
+            lookup={
+                "x": self._custom_aggregator,
+                "y": self._custom_aggregator,
+                "z": self._custom_aggregator,
+            }
+        )
 
     def _single_frame(self):
-        # Get masses and charges for the selection
         if self.grouping == "atoms":
             self.masses = self._ags[0].masses
             self.charges = self._ags[0].charges
@@ -262,11 +320,8 @@ def _single_frame(self):
                 f"{self.grouping} is not a valid value for grouping."
             )
 
-        self.totalmass = np.sum(self.masses)
-
         self.group = getattr(self._ags[0], self.grouping)
         self._ags[0].wrap(compound=self.grouping)
-
         # Find position of atom/group of atoms
         if self.grouping == "atoms":
             positions = self._ags[0].positions  # faster for atoms
diff --git a/testsuite/MDAnalysisTests/analysis/conftest.py b/testsuite/MDAnalysisTests/analysis/conftest.py
@@ -17,6 +17,7 @@
 from MDAnalysis.analysis.nucleicacids import NucPairDist
 from MDAnalysis.analysis.contacts import Contacts
 from MDAnalysis.analysis.density import DensityAnalysis
+from MDAnalysis.analysis.lineardensity import LinearDensity
 from MDAnalysis.lib.util import is_installed
 
 
@@ -176,3 +177,11 @@ def client_Contacts(request):
 @pytest.fixture(scope="module", params=params_for_cls(DensityAnalysis))
 def client_DensityAnalysis(request):
     return request.param
+
+
+# MDAnalysis.analysis.lineardensity
+
+
+@pytest.fixture(scope="module", params=params_for_cls(LinearDensity))
+def client_LinearDensity(request):
+    return request.param
diff --git a/testsuite/MDAnalysisTests/analysis/test_lineardensity.py b/testsuite/MDAnalysisTests/analysis/test_lineardensity.py
@@ -32,15 +32,15 @@
 from MDAnalysisTests.util import no_deprecated_call
 
 
-def test_invalid_grouping():
+def test_invalid_grouping(client_LinearDensity):
     """Invalid groupings raise AttributeError"""
     universe = mda.Universe(waterPSF, waterDCD)
     sel_string = "all"
     selection = universe.select_atoms(sel_string)
     with pytest.raises(AttributeError):
         # centroid is attribute of AtomGroup, but not valid here
         ld = LinearDensity(selection, grouping="centroid", binsize=5)
-        ld.run()
+        ld.run(**client_LinearDensity)
 
 
 # test data for grouping='atoms'
@@ -163,11 +163,14 @@ def test_lineardensity(
     expected_charges,
     expected_xmass,
     expected_xcharge,
+    client_LinearDensity,
 ):
     universe = mda.Universe(waterPSF, waterDCD)
     sel_string = "all"
     selection = universe.select_atoms(sel_string)
-    ld = LinearDensity(selection, grouping, binsize=5).run()
+    ld = LinearDensity(selection, grouping, binsize=5).run(
+        **client_LinearDensity
+    )
     assert_allclose(ld.masses, expected_masses)
     assert_allclose(ld.charges, expected_charges)
     # rtol changed here due to floating point imprecision
@@ -209,11 +212,11 @@ def testing_Universe():
     return u
 
 
-def test_updating_atomgroup(testing_Universe):
+def test_updating_atomgroup(testing_Universe, client_LinearDensity):
     expected_z_pos = np.array([0.0, 0.91329641, 0.08302695, 0.0, 0.0, 0.0])
     u = testing_Universe
     selection = u.select_atoms("prop z < 3", updating=True)
-    ld = LinearDensity(selection, binsize=1).run()
+    ld = LinearDensity(selection, binsize=1).run(**client_LinearDensity)
     assert_allclose(ld.results.z.mass_density, expected_z_pos)
     # Test whether histogram bins are saved correctly.
     expected_bin_edges = np.arange(0, 7)
@@ -255,6 +258,8 @@ def test_old_name_deprecations():
 
 
 # TODO: deprecated, remove in 3.0.0
+# the parallelization here is not related to the parallelization through
+# the AnalysisBase, so it is tested only in serial
 def test_parallel_analysis(testing_Universe):
     """tests _add_other_result() method. Runs LinearDensity for all atoms of
     a universe and for two subsets, then adds the results of the two subsets
@@ -276,3 +281,21 @@ def test_parallel_analysis(testing_Universe):
     assert_allclose(
         ld1.results.x.mass_density, ld_whole.results.x.mass_density
     )
+
+
+def test_class_is_parallelizable():
+    assert (
+        mda.analysis.lineardensity.LinearDensity._analysis_algorithm_is_parallelizable
+        == True
+    )
+
+
+def test_supported_backends():
+    assert (
+        mda.analysis.lineardensity.LinearDensity.get_supported_backends()
+        == (
+            "serial",
+            "multiprocessing",
+            "dask",
+        )
+    )