Skip to content

Commit e213f2b

Browse files
authored
Parallelize lindensity (#5007)
* Fixes #4678 * Parallelizes the mass and charge density profile calculation class (MDAnalysis.analysis.lineardensity.LinearDensity). As density profiles are computed independently for each timestep, the current parallelization methods allow the calculation of the density profiles without any problems. * Need to initialize masses and charges in `__init__` to enable parallelization + added code comments * added testing (boilerplate fixture to testsuite/analysis/conftest.py, analogous with existing ones and a client_... fixture to all tests using in testsuite/MDAnalysisTests/analysis/test_lineardensity.py) * cleanup: removed LinearDensity.totalmass attribute: was neither used nor documented and could contain suprising values if UpdatingAtomGroups were used * update AUTHOS * update CHANGELOG
1 parent e64755c commit e213f2b

File tree

5 files changed

+107
-14
lines changed

5 files changed

+107
-14
lines changed

package/AUTHORS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,7 @@ Chronological list of authors
256256
- James Rowe
257257
- Debasish Mohanty
258258
- Abdulrahman Elbanna
259+
- Tulga-Erdene Sodjargal
259260

260261

261262
External code

package/CHANGELOG

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ The rules for this file:
1515

1616
-------------------------------------------------------------------------------
1717
??/??/?? IAlibay, orbeckst, BHM-Bob, TRY-ER, Abdulrahman-PROG, pbuslaev,
18-
yuxuanzhuang, yuyuan871111, tanishy7777
18+
yuxuanzhuang, yuyuan871111, tanishy7777, tulga-rdn
1919

2020

2121
* 2.10.0
@@ -33,6 +33,7 @@ Fixes
3333
* Fixes the benchmark `SimpleRmsBench` in `benchmarks/analysis/rms.py`
3434
by changing the way weights for RMSD are calculated, instead of
3535
directly passing them. (Issue #3520, PR #5006)
36+
3637
Enhancements
3738
* Improve parsing of topology information from LAMMPS dump files to allow
3839
reading of mass, charge and element attributes. (Issue #3449, PR #4995)
@@ -45,8 +46,12 @@ Enhancements
4546
force the use of ChainID as the segID when reading PDB (it is helpful
4647
if the segment IDs (segids) are partially missing in the PDB file).
4748
(Issue #4948 #2874, PR #4965)
49+
* Enables parallelization for analysis.lineardensity.LinearDensity
50+
(Issue #4678, PR #5007)
4851

4952
Changes
53+
* Removed undocumented and unused attribute
54+
`analysis.lineardensity.LinearDensity.totalmass` (PR #5007)
5055

5156
Deprecations
5257

package/MDAnalysis/analysis/lineardensity.py

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from MDAnalysis.analysis.base import AnalysisBase, Results
3737
from MDAnalysis.units import constants
3838
from MDAnalysis.lib.util import deprecate
39+
from MDAnalysis.analysis.results import ResultsGroup
3940

4041

4142
# TODO: Remove in version 3.0.0
@@ -188,6 +189,10 @@ class LinearDensity(AnalysisBase):
188189
It contains the bin edges of the histrogram bins for calculated
189190
densities and can be used for easier plotting of histogram data.
190191
192+
.. versionchanged:: 2.10.0
193+
* Introduced :meth:`get_supported_backends` allowing for parallel execution
194+
on :mod:`multiprocessing` and :mod:`dask` backends.
195+
* Removed undocumented and unused attribute :attr:`totalmass`.
191196
192197
.. deprecated:: 2.2.0
193198
The `results` dictionary has been changed and the attributes
@@ -198,6 +203,16 @@ class LinearDensity(AnalysisBase):
198203
and :attr:`results.x.charge_density_stddev` instead.
199204
"""
200205

206+
_analysis_algorithm_is_parallelizable = True
207+
208+
@classmethod
209+
def get_supported_backends(cls):
210+
return (
211+
"serial",
212+
"multiprocessing",
213+
"dask",
214+
)
215+
201216
def __init__(self, select, grouping="atoms", binsize=0.25, **kwargs):
202217
super(LinearDensity, self).__init__(
203218
select.universe.trajectory, **kwargs
@@ -242,13 +257,56 @@ def __init__(self, select, grouping="atoms", binsize=0.25, **kwargs):
242257
for key in self.keys:
243258
self.results[dim][key] = np.zeros(self.nbins)
244259

245-
# Variables later defined in _single_frame() method
246-
self.masses = None
247-
self.charges = None
248-
self.totalmass = None
260+
# Get masses and charges for the selection (e.g. UpdatingAtomGroup)
261+
if self.grouping == "atoms":
262+
self.masses = self._ags[0].masses
263+
self.charges = self._ags[0].charges
264+
265+
elif self.grouping in ["residues", "segments", "fragments"]:
266+
self.masses = self._ags[0].total_mass(compound=self.grouping)
267+
self.charges = self._ags[0].total_charge(compound=self.grouping)
268+
269+
else:
270+
raise AttributeError(
271+
f"{self.grouping} is not a valid value for grouping."
272+
)
273+
274+
@staticmethod
275+
def _custom_aggregator(results):
276+
# NB: the *stddev values here are not the standard deviation,
277+
# but the variance. The stddev is calculated in _conclude()
278+
mass_density = np.sum(
279+
[entry["mass_density"] for entry in results], axis=0
280+
)
281+
mass_density_stddev = np.sum(
282+
[entry["mass_density_stddev"] for entry in results], axis=0
283+
)
284+
charge_density = np.sum(
285+
[entry["charge_density"] for entry in results], axis=0
286+
)
287+
charge_density_stddev = np.sum(
288+
[entry["charge_density_stddev"] for entry in results], axis=0
289+
)
290+
return Results(
291+
dim=results[0]["dim"],
292+
slice_volume=results[0]["slice_volume"],
293+
hist_bin_edges=results[0]["hist_bin_edges"],
294+
mass_density=mass_density,
295+
mass_density_stddev=mass_density_stddev,
296+
charge_density=charge_density,
297+
charge_density_stddev=charge_density_stddev,
298+
)
299+
300+
def _get_aggregator(self):
301+
return ResultsGroup(
302+
lookup={
303+
"x": self._custom_aggregator,
304+
"y": self._custom_aggregator,
305+
"z": self._custom_aggregator,
306+
}
307+
)
249308

250309
def _single_frame(self):
251-
# Get masses and charges for the selection
252310
if self.grouping == "atoms":
253311
self.masses = self._ags[0].masses
254312
self.charges = self._ags[0].charges
@@ -262,11 +320,8 @@ def _single_frame(self):
262320
f"{self.grouping} is not a valid value for grouping."
263321
)
264322

265-
self.totalmass = np.sum(self.masses)
266-
267323
self.group = getattr(self._ags[0], self.grouping)
268324
self._ags[0].wrap(compound=self.grouping)
269-
270325
# Find position of atom/group of atoms
271326
if self.grouping == "atoms":
272327
positions = self._ags[0].positions # faster for atoms

testsuite/MDAnalysisTests/analysis/conftest.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from MDAnalysis.analysis.nucleicacids import NucPairDist
1818
from MDAnalysis.analysis.contacts import Contacts
1919
from MDAnalysis.analysis.density import DensityAnalysis
20+
from MDAnalysis.analysis.lineardensity import LinearDensity
2021
from MDAnalysis.lib.util import is_installed
2122

2223

@@ -176,3 +177,11 @@ def client_Contacts(request):
176177
@pytest.fixture(scope="module", params=params_for_cls(DensityAnalysis))
177178
def client_DensityAnalysis(request):
178179
return request.param
180+
181+
182+
# MDAnalysis.analysis.lineardensity
183+
184+
185+
@pytest.fixture(scope="module", params=params_for_cls(LinearDensity))
186+
def client_LinearDensity(request):
187+
return request.param

testsuite/MDAnalysisTests/analysis/test_lineardensity.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,15 @@
3232
from MDAnalysisTests.util import no_deprecated_call
3333

3434

35-
def test_invalid_grouping():
35+
def test_invalid_grouping(client_LinearDensity):
3636
"""Invalid groupings raise AttributeError"""
3737
universe = mda.Universe(waterPSF, waterDCD)
3838
sel_string = "all"
3939
selection = universe.select_atoms(sel_string)
4040
with pytest.raises(AttributeError):
4141
# centroid is attribute of AtomGroup, but not valid here
4242
ld = LinearDensity(selection, grouping="centroid", binsize=5)
43-
ld.run()
43+
ld.run(**client_LinearDensity)
4444

4545

4646
# test data for grouping='atoms'
@@ -163,11 +163,14 @@ def test_lineardensity(
163163
expected_charges,
164164
expected_xmass,
165165
expected_xcharge,
166+
client_LinearDensity,
166167
):
167168
universe = mda.Universe(waterPSF, waterDCD)
168169
sel_string = "all"
169170
selection = universe.select_atoms(sel_string)
170-
ld = LinearDensity(selection, grouping, binsize=5).run()
171+
ld = LinearDensity(selection, grouping, binsize=5).run(
172+
**client_LinearDensity
173+
)
171174
assert_allclose(ld.masses, expected_masses)
172175
assert_allclose(ld.charges, expected_charges)
173176
# rtol changed here due to floating point imprecision
@@ -209,11 +212,11 @@ def testing_Universe():
209212
return u
210213

211214

212-
def test_updating_atomgroup(testing_Universe):
215+
def test_updating_atomgroup(testing_Universe, client_LinearDensity):
213216
expected_z_pos = np.array([0.0, 0.91329641, 0.08302695, 0.0, 0.0, 0.0])
214217
u = testing_Universe
215218
selection = u.select_atoms("prop z < 3", updating=True)
216-
ld = LinearDensity(selection, binsize=1).run()
219+
ld = LinearDensity(selection, binsize=1).run(**client_LinearDensity)
217220
assert_allclose(ld.results.z.mass_density, expected_z_pos)
218221
# Test whether histogram bins are saved correctly.
219222
expected_bin_edges = np.arange(0, 7)
@@ -255,6 +258,8 @@ def test_old_name_deprecations():
255258

256259

257260
# TODO: deprecated, remove in 3.0.0
261+
# the parallelization here is not related to the parallelization through
262+
# the AnalysisBase, so it is tested only in serial
258263
def test_parallel_analysis(testing_Universe):
259264
"""tests _add_other_result() method. Runs LinearDensity for all atoms of
260265
a universe and for two subsets, then adds the results of the two subsets
@@ -276,3 +281,21 @@ def test_parallel_analysis(testing_Universe):
276281
assert_allclose(
277282
ld1.results.x.mass_density, ld_whole.results.x.mass_density
278283
)
284+
285+
286+
def test_class_is_parallelizable():
287+
assert (
288+
mda.analysis.lineardensity.LinearDensity._analysis_algorithm_is_parallelizable
289+
== True
290+
)
291+
292+
293+
def test_supported_backends():
294+
assert (
295+
mda.analysis.lineardensity.LinearDensity.get_supported_backends()
296+
== (
297+
"serial",
298+
"multiprocessing",
299+
"dask",
300+
)
301+
)

0 commit comments

Comments
 (0)