Skip to content

Commit 9531c6e

Browse files
Refactor residue-name matching logic in selection.py (#5294)
Refactor: extract duplicated resname matching logic into Selection._apply_match_by_resnames (#5294) Deduplicate residue-name matching logic repeated across ProteinSelection, NucleicSelection, WaterSelection, BackboneSelection, NucleicBackboneSelection, BaseSelection, and NucleicSugarSelection into a single helper method on the Selection base class. No behavioral changes. Fixes #5247
1 parent 6e837a4 commit 9531c6e

File tree

3 files changed

+22
-59
lines changed

3 files changed

+22
-59
lines changed

package/AUTHORS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ Chronological list of authors
275275
- Harshit Gajjela
276276
- Kunj Sinha
277277
- Ayush Agarwal
278+
- Parth Uppal
278279

279280
External code
280281
-------------

package/CHANGELOG

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ Fixes
4242
DSSP by porting upstream PyDSSP 0.9.1 fix (Issue #4913)
4343

4444
Enhancements
45+
* Reduces duplication of code in _apply() function (Issue #5247, PR #5294)
4546
* Added new top-level `MDAnalysis.fetch` module (PR #4943)
4647
* Added new function `MDAnalysis.fetch.from_PDB` to download structure files from wwPDB
4748
using `pooch` as optional dependency (Issue #4907, PR #4943)

package/MDAnalysis/core/selection.py

Lines changed: 20 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,19 @@ def __init__(self, parser, tokens):
253253
def apply(self, *args, **kwargs):
254254
return self._apply(*args, **kwargs).asunique(sorted=self.parser.sorted)
255255

256+
def _apply_match_by_resnames(self, group, target_resnames):
257+
"""Helper function to select atoms based on residue name matches in the topology."""
258+
resnames = group.universe._topology.resnames
259+
nmidx = resnames.nmidx[group.resindices]
260+
261+
matches = [
262+
ix
263+
for (nm, ix) in resnames.namedict.items()
264+
if nm in target_resnames
265+
]
266+
267+
return group[np.isin(nmidx, matches)]
268+
256269

257270
class AllSelection(Selection):
258271
token = "all"
@@ -1193,17 +1206,7 @@ class ProteinSelection(Selection):
11931206
}
11941207

11951208
def _apply(self, group):
1196-
resname_attr = group.universe._topology.resnames
1197-
# which values in resname attr are in prot_res?
1198-
matches = [
1199-
ix
1200-
for (nm, ix) in resname_attr.namedict.items()
1201-
if nm in self.prot_res
1202-
]
1203-
# index of each atom's resname
1204-
nmidx = resname_attr.nmidx[group.resindices]
1205-
# intersect atom's resname index and matches to prot_res
1206-
return group[np.isin(nmidx, matches)]
1209+
return self._apply_match_by_resnames(group, self.prot_res)
12071210

12081211

12091212
class NucleicSelection(Selection):
@@ -1263,15 +1266,7 @@ class NucleicSelection(Selection):
12631266
}
12641267

12651268
def _apply(self, group):
1266-
resnames = group.universe._topology.resnames
1267-
nmidx = resnames.nmidx[group.resindices]
1268-
1269-
matches = [
1270-
ix for (nm, ix) in resnames.namedict.items() if nm in self.nucl_res
1271-
]
1272-
mask = np.isin(nmidx, matches)
1273-
1274-
return group[mask]
1269+
return self._apply_match_by_resnames(group, self.nucl_res)
12751270

12761271

12771272
class WaterSelection(Selection):
@@ -1308,17 +1303,7 @@ class WaterSelection(Selection):
13081303
}
13091304

13101305
def _apply(self, group):
1311-
resnames = group.universe._topology.resnames
1312-
nmidx = resnames.nmidx[group.resindices]
1313-
1314-
matches = [
1315-
ix
1316-
for (nm, ix) in resnames.namedict.items()
1317-
if nm in self.water_res
1318-
]
1319-
mask = np.isin(nmidx, matches)
1320-
1321-
return group[mask]
1306+
return self._apply_match_by_resnames(group, self.water_res)
13221307

13231308

13241309
class BackboneSelection(ProteinSelection):
@@ -1350,13 +1335,7 @@ def _apply(self, group):
13501335
group = group[np.isin(nmidx, name_matches)]
13511336

13521337
# filter by resnames
1353-
resname_matches = [
1354-
ix for (nm, ix) in resnames.namedict.items() if nm in self.prot_res
1355-
]
1356-
nmidx = resnames.nmidx[group.resindices]
1357-
group = group[np.isin(nmidx, resname_matches)]
1358-
1359-
return group.unique
1338+
return self._apply_match_by_resnames(group, self.prot_res).unique
13601339

13611340

13621341
class NucleicBackboneSelection(NucleicSelection):
@@ -1388,13 +1367,7 @@ def _apply(self, group):
13881367
group = group[np.isin(nmidx, name_matches)]
13891368

13901369
# filter by resnames
1391-
resname_matches = [
1392-
ix for (nm, ix) in resnames.namedict.items() if nm in self.nucl_res
1393-
]
1394-
nmidx = resnames.nmidx[group.resindices]
1395-
group = group[np.isin(nmidx, resname_matches)]
1396-
1397-
return group.unique
1370+
return self._apply_match_by_resnames(group, self.nucl_res).unique
13981371

13991372

14001373
class BaseSelection(NucleicSelection):
@@ -1445,13 +1418,7 @@ def _apply(self, group):
14451418
group = group[np.isin(nmidx, name_matches)]
14461419

14471420
# filter by resnames
1448-
resname_matches = [
1449-
ix for (nm, ix) in resnames.namedict.items() if nm in self.nucl_res
1450-
]
1451-
nmidx = resnames.nmidx[group.resindices]
1452-
group = group[np.isin(nmidx, resname_matches)]
1453-
1454-
return group.unique
1421+
return self._apply_match_by_resnames(group, self.nucl_res).unique
14551422

14561423

14571424
class NucleicSugarSelection(NucleicSelection):
@@ -1480,13 +1447,7 @@ def _apply(self, group):
14801447
group = group[np.isin(nmidx, name_matches)]
14811448

14821449
# filter by resnames
1483-
resname_matches = [
1484-
ix for (nm, ix) in resnames.namedict.items() if nm in self.nucl_res
1485-
]
1486-
nmidx = resnames.nmidx[group.resindices]
1487-
group = group[np.isin(nmidx, resname_matches)]
1488-
1489-
return group.unique
1450+
return self._apply_match_by_resnames(group, self.nucl_res).unique
14901451

14911452

14921453
class PropertySelection(Selection):

0 commit comments

Comments
 (0)