diff --git a/package/AUTHORS b/package/AUTHORS index a799200bf5..887210c0ae 100644 --- a/package/AUTHORS +++ b/package/AUTHORS @@ -275,6 +275,7 @@ Chronological list of authors - Harshit Gajjela - Kunj Sinha - Ayush Agarwal + - Parth Uppal External code ------------- diff --git a/package/CHANGELOG b/package/CHANGELOG index 790bbbe80f..7b75455f19 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -42,6 +42,7 @@ Fixes DSSP by porting upstream PyDSSP 0.9.1 fix (Issue #4913) Enhancements + * Reduces duplication of code in _apply() function (Issue #5247, PR #5294) * Added new top-level `MDAnalysis.fetch` module (PR #4943) * Added new function `MDAnalysis.fetch.from_PDB` to download structure files from wwPDB using `pooch` as optional dependency (Issue #4907, PR #4943) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 6acd873ee3..a7cd032cc4 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -253,6 +253,19 @@ def __init__(self, parser, tokens): def apply(self, *args, **kwargs): return self._apply(*args, **kwargs).asunique(sorted=self.parser.sorted) + def _apply_match_by_resnames(self, group, target_resnames): + """Helper function to select atoms based on residue name matches in the topology.""" + resnames = group.universe._topology.resnames + nmidx = resnames.nmidx[group.resindices] + + matches = [ + ix + for (nm, ix) in resnames.namedict.items() + if nm in target_resnames + ] + + return group[np.isin(nmidx, matches)] + class AllSelection(Selection): token = "all" @@ -1193,17 +1206,7 @@ class ProteinSelection(Selection): } def _apply(self, group): - resname_attr = group.universe._topology.resnames - # which values in resname attr are in prot_res? - matches = [ - ix - for (nm, ix) in resname_attr.namedict.items() - if nm in self.prot_res - ] - # index of each atom's resname - nmidx = resname_attr.nmidx[group.resindices] - # intersect atom's resname index and matches to prot_res - return group[np.isin(nmidx, matches)] + return self._apply_match_by_resnames(group, self.prot_res) class NucleicSelection(Selection): @@ -1263,15 +1266,7 @@ class NucleicSelection(Selection): } def _apply(self, group): - resnames = group.universe._topology.resnames - nmidx = resnames.nmidx[group.resindices] - - matches = [ - ix for (nm, ix) in resnames.namedict.items() if nm in self.nucl_res - ] - mask = np.isin(nmidx, matches) - - return group[mask] + return self._apply_match_by_resnames(group, self.nucl_res) class WaterSelection(Selection): @@ -1308,17 +1303,7 @@ class WaterSelection(Selection): } def _apply(self, group): - resnames = group.universe._topology.resnames - nmidx = resnames.nmidx[group.resindices] - - matches = [ - ix - for (nm, ix) in resnames.namedict.items() - if nm in self.water_res - ] - mask = np.isin(nmidx, matches) - - return group[mask] + return self._apply_match_by_resnames(group, self.water_res) class BackboneSelection(ProteinSelection): @@ -1350,13 +1335,7 @@ def _apply(self, group): group = group[np.isin(nmidx, name_matches)] # filter by resnames - resname_matches = [ - ix for (nm, ix) in resnames.namedict.items() if nm in self.prot_res - ] - nmidx = resnames.nmidx[group.resindices] - group = group[np.isin(nmidx, resname_matches)] - - return group.unique + return self._apply_match_by_resnames(group, self.prot_res).unique class NucleicBackboneSelection(NucleicSelection): @@ -1388,13 +1367,7 @@ def _apply(self, group): group = group[np.isin(nmidx, name_matches)] # filter by resnames - resname_matches = [ - ix for (nm, ix) in resnames.namedict.items() if nm in self.nucl_res - ] - nmidx = resnames.nmidx[group.resindices] - group = group[np.isin(nmidx, resname_matches)] - - return group.unique + return self._apply_match_by_resnames(group, self.nucl_res).unique class BaseSelection(NucleicSelection): @@ -1445,13 +1418,7 @@ def _apply(self, group): group = group[np.isin(nmidx, name_matches)] # filter by resnames - resname_matches = [ - ix for (nm, ix) in resnames.namedict.items() if nm in self.nucl_res - ] - nmidx = resnames.nmidx[group.resindices] - group = group[np.isin(nmidx, resname_matches)] - - return group.unique + return self._apply_match_by_resnames(group, self.nucl_res).unique class NucleicSugarSelection(NucleicSelection): @@ -1480,13 +1447,7 @@ def _apply(self, group): group = group[np.isin(nmidx, name_matches)] # filter by resnames - resname_matches = [ - ix for (nm, ix) in resnames.namedict.items() if nm in self.nucl_res - ] - nmidx = resnames.nmidx[group.resindices] - group = group[np.isin(nmidx, resname_matches)] - - return group.unique + return self._apply_match_by_resnames(group, self.nucl_res).unique class PropertySelection(Selection):