Skip to content

Commit 8eeb972

Browse files
committed
Merge branch 'develop' into standardize-atomicdistances-to-use-results
2 parents 77f17c0 + 3be9117 commit 8eeb972

File tree

6 files changed

+129
-8
lines changed

6 files changed

+129
-8
lines changed

benchmarks/benchmarks/ag_methods.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,17 @@ def time_wrap_compound(self, num_atoms):
195195
"""
196196
self.ag.wrap(compound="residues")
197197

198+
def time_asunique_no_sorted(self, num_atoms):
199+
"""Benchmark asunique() operation on
200+
atomgroup without sorting"""
201+
self.ag.asunique(sorted=False)
202+
203+
def time_asunique_sorted(self, num_atoms):
204+
"""Benchmark asunique() operation on
205+
atomgroup with sorting"""
206+
self.ag.asunique(sorted=True)
207+
208+
198209
class AtomGroupAttrsBench(object):
199210
"""Benchmarks for the various MDAnalysis
200211
atomgroup attributes.

package/AUTHORS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,4 +325,4 @@ Logo
325325

326326
The MDAnalysis 'Atom' logo was designed by Christian Beckstein; it is
327327
Copyright (c) 2011 Christian Beckstein and made available under a
328-
Creative Commons Attribution-NoDerivs 3.0 Unported License.
328+
Creative Commons Attribution-NoDerivs 3.0 Unported License.

package/CHANGELOG

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ Fixes
4747
DSSP by porting upstream PyDSSP 0.9.1 fix (Issue #4913)
4848

4949
Enhancements
50+
* Improved performance of inverse index mapping in AtomGroup using an optimized
51+
Cython implementation in lib._cutils.inverse_int_index()
52+
(Issue #3387, PR #5252)
5053
* Added documentation for all keyword in select_atoms() and
5154
selections.rst (Issue #5317, PR #5325)
5255
* Added HydrogenBondAnalysis benchmark for performance tracking (PR #5309)
@@ -3632,4 +3635,4 @@ Testsuite
36323635
licenses
36333636

36343637
11/12/07 naveen
3635-
* prepared for release outside lab
3638+
* prepared for release outside lab

package/MDAnalysis/core/groups.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@
122122
from ..exceptions import NoDataError
123123
from . import topologyobjects
124124
from ._get_readers import get_writer_for, get_converter_for
125+
from ..lib._cutil import inverse_int_index
125126

126127

127128
def _unpickle(u, ix):
@@ -912,10 +913,7 @@ def _asunique(self, group, sorted=False, set_mask=False):
912913

913914
indices = unique_int_1d_unsorted(self.ix)
914915
if set_mask:
915-
mask = np.zeros_like(self.ix)
916-
for i, x in enumerate(indices):
917-
values = np.where(self.ix == x)[0]
918-
mask[values] = i
916+
mask = inverse_int_index(self.ix, indices)
919917
self._unique_restore_mask = mask
920918

921919
issorted = int_array_is_sorted(indices)

package/MDAnalysis/lib/_cutil.pyx

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ from cython.operator cimport dereference as deref
3737

3838
cnp.import_array()
3939

40-
__all__ = ['unique_int_1d', 'make_whole', 'find_fragments',
40+
__all__ = ['unique_int_1d', 'inverse_int_index', 'make_whole', 'find_fragments',
4141
'_sarrus_det_single', '_sarrus_det_multiple']
4242

4343
cdef extern from "calc_distances.h":
@@ -91,6 +91,65 @@ def unique_int_1d(cnp.intp_t[:] values):
9191

9292
return np.array(result)
9393

94+
@cython.boundscheck(False)
95+
@cython.wraparound(False)
96+
def inverse_int_index(cnp.intp_t[:] values,
97+
cnp.intp_t[:] unique_vals):
98+
r"""Construct an inverse index array (mask) mapping values to unique_vals.
99+
100+
The returned mask contains the indices such that:
101+
102+
.. math::
103+
\text{unique\_vals}[\text{mask}] == \text{values}
104+
105+
Parameters
106+
----------
107+
values : numpy.ndarray
108+
1D array of integers (can contain duplicates).
109+
unique_vals : numpy.ndarray
110+
1D array of unique integers corresponding to the elements in `values`.
111+
112+
Returns
113+
-------
114+
numpy.ndarray
115+
An integer array `mask` of the same length as `values`, where
116+
``mask[i]`` is the index of ``values[i]`` in `unique_vals`.
117+
118+
119+
Notes
120+
-----
121+
122+
123+
.. versionadded:: 2.11.0
124+
125+
126+
Examples
127+
--------
128+
>>> import numpy as np
129+
>>> from MDAnalysis.lib._cutil import inverse_int_index
130+
>>> vals = np.array([1, 5, 3, 3, 6], dtype=np.intp)
131+
>>> uniq = np.array([1, 5, 3, 6], dtype=np.intp)
132+
>>> mask = inverse_int_index(vals, uniq)
133+
>>> mask
134+
array([0, 1, 2, 2, 3])
135+
>>> np.all(uniq[mask] == vals)
136+
True
137+
"""
138+
139+
cdef Py_ssize_t n = values.shape[0]
140+
cdef Py_ssize_t m = unique_vals.shape[0]
141+
cdef Py_ssize_t i
142+
143+
cdef dict lookup = {}
144+
cdef cnp.intp_t[:] mask = np.empty(n, dtype=np.intp)
145+
146+
for i in range(m):
147+
lookup[unique_vals[i]] = i
148+
149+
for i in range(n):
150+
mask[i] = lookup[values[i]]
151+
152+
return np.array(mask)
94153

95154
@cython.boundscheck(False)
96155
def _in2d(cnp.intp_t[:, :] arr1, cnp.intp_t[:, :] arr2):
@@ -515,4 +574,4 @@ def find_fragments(atoms, bondlist):
515574
# Add fragment to output
516575
frags.append(np.asarray(this_frag))
517576

518-
return frags
577+
return frags

testsuite/MDAnalysisTests/lib/test_cutil.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
unique_int_1d,
2929
find_fragments,
3030
_in2d,
31+
inverse_int_index,
3132
)
3233

3334

@@ -103,3 +104,52 @@ def test_in2d_VE(arr1, arr2):
103104
ValueError, match=r"Both arrays must be \(n, 2\) arrays"
104105
):
105106
_in2d(arr1, arr2)
107+
108+
109+
def _python_reference_mask(ix, indices):
110+
mask = np.zeros_like(ix)
111+
for i, x in enumerate(indices):
112+
values = np.where(ix == x)[0]
113+
mask[values] = i
114+
return mask
115+
116+
117+
@pytest.mark.parametrize(
118+
"ix,indices",
119+
[
120+
# unsorted and not unique
121+
(
122+
np.array([1, 5, 3, 3, 6], dtype=np.intp),
123+
np.array([1, 5, 3, 6], dtype=np.intp),
124+
),
125+
# sorted and not unique
126+
(
127+
np.array([1, 3, 3, 5, 6], dtype=np.intp),
128+
np.array([1, 3, 5, 6], dtype=np.intp),
129+
),
130+
# unsorted and unique
131+
(
132+
np.array([1, 5, 3, 6], dtype=np.intp),
133+
np.array([1, 5, 3, 6], dtype=np.intp),
134+
),
135+
# sorted and unique
136+
(
137+
np.array([1, 3, 5, 6], dtype=np.intp),
138+
np.array([1, 3, 5, 6], dtype=np.intp),
139+
),
140+
# all elements identical
141+
(
142+
np.array([5, 5, 5], dtype=np.intp),
143+
np.array([5], dtype=np.intp),
144+
),
145+
# single element
146+
(
147+
np.array([7], dtype=np.intp),
148+
np.array([7], dtype=np.intp),
149+
),
150+
],
151+
)
152+
def test_inverse_int_index(ix, indices):
153+
pyref = _python_reference_mask(ix, indices)
154+
cy = inverse_int_index(ix, indices)
155+
assert_equal(pyref, cy)

0 commit comments

Comments
 (0)