Refactor residue-name matching logic in selection.py (#5294)

ParthUppal523 · web-flow · commit 9531c6e157d0 · 2026-03-09T10:22:37.000-07:00
Refactor: extract duplicated resname matching logic into Selection._apply_match_by_resnames (#5294) Deduplicate residue-name matching logic repeated across ProteinSelection, NucleicSelection, WaterSelection, BackboneSelection, NucleicBackboneSelection, BaseSelection, and NucleicSugarSelection into a single helper method on the Selection base class. No behavioral changes. Fixes #5247
diff --git a/package/AUTHORS b/package/AUTHORS
@@ -275,6 +275,7 @@ Chronological list of authors
   - Harshit Gajjela
   - Kunj Sinha
   - Ayush Agarwal
+  - Parth Uppal
 
 External code
 -------------
diff --git a/package/CHANGELOG b/package/CHANGELOG
@@ -42,6 +42,7 @@ Fixes
    DSSP by porting upstream PyDSSP 0.9.1 fix (Issue #4913)
 
 Enhancements
+ * Reduces duplication of code in _apply() function (Issue #5247, PR #5294)
  * Added new top-level `MDAnalysis.fetch` module (PR #4943)
  * Added new function `MDAnalysis.fetch.from_PDB` to download structure files from wwPDB
    using `pooch` as optional dependency (Issue #4907, PR #4943) 
diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py
@@ -253,6 +253,19 @@ def __init__(self, parser, tokens):
     def apply(self, *args, **kwargs):
         return self._apply(*args, **kwargs).asunique(sorted=self.parser.sorted)
 
+    def _apply_match_by_resnames(self, group, target_resnames):
+        """Helper function to select atoms based on residue name matches in the topology."""
+        resnames = group.universe._topology.resnames
+        nmidx = resnames.nmidx[group.resindices]
+
+        matches = [
+            ix
+            for (nm, ix) in resnames.namedict.items()
+            if nm in target_resnames
+        ]
+
+        return group[np.isin(nmidx, matches)]
+
 
 class AllSelection(Selection):
     token = "all"
@@ -1193,17 +1206,7 @@ class ProteinSelection(Selection):
     }
 
     def _apply(self, group):
-        resname_attr = group.universe._topology.resnames
-        # which values in resname attr are in prot_res?
-        matches = [
-            ix
-            for (nm, ix) in resname_attr.namedict.items()
-            if nm in self.prot_res
-        ]
-        # index of each atom's resname
-        nmidx = resname_attr.nmidx[group.resindices]
-        # intersect atom's resname index and matches to prot_res
-        return group[np.isin(nmidx, matches)]
+        return self._apply_match_by_resnames(group, self.prot_res)
 
 
 class NucleicSelection(Selection):
@@ -1263,15 +1266,7 @@ class NucleicSelection(Selection):
     }
 
     def _apply(self, group):
-        resnames = group.universe._topology.resnames
-        nmidx = resnames.nmidx[group.resindices]
-
-        matches = [
-            ix for (nm, ix) in resnames.namedict.items() if nm in self.nucl_res
-        ]
-        mask = np.isin(nmidx, matches)
-
-        return group[mask]
+        return self._apply_match_by_resnames(group, self.nucl_res)
 
 
 class WaterSelection(Selection):
@@ -1308,17 +1303,7 @@ class WaterSelection(Selection):
     }
 
     def _apply(self, group):
-        resnames = group.universe._topology.resnames
-        nmidx = resnames.nmidx[group.resindices]
-
-        matches = [
-            ix
-            for (nm, ix) in resnames.namedict.items()
-            if nm in self.water_res
-        ]
-        mask = np.isin(nmidx, matches)
-
-        return group[mask]
+        return self._apply_match_by_resnames(group, self.water_res)
 
 
 class BackboneSelection(ProteinSelection):
@@ -1350,13 +1335,7 @@ def _apply(self, group):
         group = group[np.isin(nmidx, name_matches)]
 
         # filter by resnames
-        resname_matches = [
-            ix for (nm, ix) in resnames.namedict.items() if nm in self.prot_res
-        ]
-        nmidx = resnames.nmidx[group.resindices]
-        group = group[np.isin(nmidx, resname_matches)]
-
-        return group.unique
+        return self._apply_match_by_resnames(group, self.prot_res).unique
 
 
 class NucleicBackboneSelection(NucleicSelection):
@@ -1388,13 +1367,7 @@ def _apply(self, group):
         group = group[np.isin(nmidx, name_matches)]
 
         # filter by resnames
-        resname_matches = [
-            ix for (nm, ix) in resnames.namedict.items() if nm in self.nucl_res
-        ]
-        nmidx = resnames.nmidx[group.resindices]
-        group = group[np.isin(nmidx, resname_matches)]
-
-        return group.unique
+        return self._apply_match_by_resnames(group, self.nucl_res).unique
 
 
 class BaseSelection(NucleicSelection):
@@ -1445,13 +1418,7 @@ def _apply(self, group):
         group = group[np.isin(nmidx, name_matches)]
 
         # filter by resnames
-        resname_matches = [
-            ix for (nm, ix) in resnames.namedict.items() if nm in self.nucl_res
-        ]
-        nmidx = resnames.nmidx[group.resindices]
-        group = group[np.isin(nmidx, resname_matches)]
-
-        return group.unique
+        return self._apply_match_by_resnames(group, self.nucl_res).unique
 
 
 class NucleicSugarSelection(NucleicSelection):
@@ -1480,13 +1447,7 @@ def _apply(self, group):
         group = group[np.isin(nmidx, name_matches)]
 
         # filter by resnames
-        resname_matches = [
-            ix for (nm, ix) in resnames.namedict.items() if nm in self.nucl_res
-        ]
-        nmidx = resnames.nmidx[group.resindices]
-        group = group[np.isin(nmidx, resname_matches)]
-
-        return group.unique
+        return self._apply_match_by_resnames(group, self.nucl_res).unique
 
 
 class PropertySelection(Selection):