diff --git a/package/CHANGELOG b/package/CHANGELOG index 3d30351c9c5..35d673727fe 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -15,7 +15,7 @@ The rules for this file: ------------------------------------------------------------------------------- ??/??/?? IAlibay, orbeckst, BHM-Bob, TRY-ER, Abdulrahman-PROG, pbuslaev, - yuxuanzhuang + yuxuanzhuang, yuyuan871111 * 2.10.0 @@ -34,6 +34,14 @@ Enhancements * Improve parsing of topology information from LAMMPS dump files to allow reading of mass, charge and element attributes. (Issue #3449, PR #4995) * Added timestep support for XDR writers and readers (Issue #4905, PR #4908) + * New feature in `Universe`: `set_groups`. This function allows updating + `_topology`, `residues` (ResidueGroup), `segments` (SegmentGroup) in + the `Universe` using user-provided atomwise `resids` and/or `segids`. + (Issue #4948 #2874, PR #4965) + * Adds an argument `force_chainids_to_segids` in `PDBParser`: this will + force the use of ChainID as the segID when reading PDB (it is helpful + if the segment IDs (segids) are partially missing in the PDB file). + (Issue #4948 #2874, PR #4965) Changes diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index ab57bd8531d..59626339764 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -244,6 +244,11 @@ class PDBReader(base.ReaderBase): :class:`PDBWriter` :class:`PDBReader` + If you would like to force the use of chainID as the segID when parsing PDB, + please use the keyword argument `force_chainids_to_segids=True` + (:class:`MDAnalysis.topology.PDBParser.PDBParser`). + This will prioritize the chain ID to the segment ID. + .. versionchanged:: 0.11.0 * Frames now 0-based instead of 1-based diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index 504d07c02c3..6dbc063f100 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -86,6 +86,7 @@ from .topology import Topology from .topologyattrs import ( AtomAttr, ResidueAttr, SegmentAttr, + Segindices, Segids, Resindices, Resids, Atomindices, BFACTOR_WARNING, _Connection ) from .topologyobjects import TopologyObject @@ -94,6 +95,105 @@ logger = logging.getLogger("MDAnalysis.core.universe") +def _update_topology_by_ids(universe, atomwise_resids, atomwise_segids): + """Update the topology of a Universe with new atomwise resids and segids. + + Parameters + ---------- + universe : Universe + The universe to update. + atomwise_resids : numpy.ndarray + The new atomwise residue indices. + atomwise_segids : numpy.ndarray + The new atomwise segment indices. + """ + from ..topology.base import change_squash + + # the original topology + top = universe._topology + + # detect the atom level attributes (excluding residue level and above) + atom_attrindices = [ + idx + for idx, each_attr in enumerate(top.attrs) + if (Residue not in each_attr.target_classes) and + (not isinstance(each_attr, Atomindices)) + ] + attrs = [top.attrs[each_attr] for each_attr in atom_attrindices] + + # create new residues level stuff + residue_attrindices = [ + idx + for idx, each_attr in enumerate(top.attrs) + if ( + (Residue in each_attr.target_classes) and + (Segment not in each_attr.target_classes) and + (not isinstance(each_attr, Resids)) and + (not isinstance(each_attr, Resindices)) + ) + ] # residue level attributes except resids and resindices + + res_criteria = [atomwise_resids, atomwise_segids] + [ + getattr(universe.atoms, top.attrs[each_attr].attrname) + for each_attr in residue_attrindices + if top.attrs[each_attr].attrname != 'resnums' + ] + + res_to_squash = [atomwise_resids, atomwise_segids] + [ + getattr(universe.atoms, top.attrs[each_attr].attrname) + for each_attr in residue_attrindices + ] + + residx, res_squashed = change_squash(res_criteria, res_to_squash) + resids = res_squashed[0] + res_squashed_segids = res_squashed[1] + n_residues = len(resids) + # all residue-level attributes except resids + res_squashed_res_attrs = res_squashed[2:] + + res_attrs = [Resids(resids)] + [ + # use the correspdoning type of the attribute with new sqaushed values + top.attrs[each_attr].__class__(res_squashed_res_attrs[idx]) + for idx, each_attr in enumerate(residue_attrindices) + ] + attrs.extend(res_attrs) + + # create new segment level stuff + segidx, (segids,) = change_squash((res_squashed_segids,), (res_squashed_segids,)) + n_segments = len(segids) + attrs.append(Segids(segids)) + + # other attributes + other_attrs = [ + each_attr + for each_attr in top.attrs + if ( + (Segment in each_attr.target_classes) and + (not isinstance(each_attr, Segids)) and + (not isinstance(each_attr, Atomindices)) and + (not isinstance(each_attr, Resindices)) and + (not isinstance(each_attr, Segindices)) + ) + ] + + # create new topology + top = Topology( + universe.atoms.n_atoms, + n_residues, + n_segments, + attrs=attrs, + atom_resindex=residx, + residue_segindex=segidx, + ) + + # add back other attributes + if len(other_attrs) > 0: + for each_otherattr in other_attrs: + top.add_TopologyAttr(each_otherattr) + + # update the topology in the universe + universe._topology = top + def _check_file_like(topology): if isstream(topology): @@ -312,6 +412,12 @@ class Universe(object): functionality to treat independent trajectory files as a single virtual trajectory. **kwargs: extra arguments are passed to the topology parser. + For instance, when reading a PDB file + (:class:`PDBReader`, + :class:`PDBParser`), set + ``force_chainids_to_segids=True`` to make the universe use the + chainIDs (column 22) instead of the segmentIDs (column 73-76) as the + `segids` in the universe and select the corresponding SegmentGroup. Attributes ---------- @@ -380,6 +486,10 @@ class Universe(object): guessing masses and atom types after topology is read from a registered parser. + .. versionchanged:: 2.10.0 + Added :meth: `~MDAnalysis.core.universe.Universe.set_groups` + API to set residues/segments based on the atomwise resids/segids. + """ def __init__(self, topology=None, *coordinates, all_coordinates=False, format=None, topology_format=None, transformations=None, @@ -1700,6 +1810,104 @@ def guess_TopologyAttrs( warnings.warn('Can not guess attributes ' 'for universe with 0 atoms') + def set_groups(self, atomwise_resids=None, atomwise_segids=None): + """Set the groups (`ResidueGroup`, `SegmentGroup`) of the Universe + by atomwise resids/segids. + + The `topology` will also be updated based on the provided `atomwise_resids` + and `atomwise_segids`. The original `resids` and `segids` will be stored + in attributes `atomwise_resids_orig` and/or `atomwise_segids_orig` if + they are modified. + See notes for the logic of the function. + + Parameters + ---------- + atomwise_resids: + A list of residue IDs to be set for the Universe. The length + of the list should be equal to the number of atoms in the Universe. + If `None`, the original resids will be used. + + atomwise_segids: + A list of segment IDs to be set for the Universe. The length + of the list should be equal to the number of atoms in the Universe. + If `None`, the original segids will be used. + + Raises + ------ + AssertionError + If the length of the provided atomwise_resids or atomwise_segids + does not match the number of atoms in the Universe. + + Notes + ----- + First, the function will check if resids or segids is provided. + If both resids and segids are not provided (`None`), it will do nothing. + If only one of them is provided, it will use the original values for the + other one. If both are provided, it will use the provided values for + both resids and segids. + The function will then update the topology by a new generated topology + with new values of the resids and segids. + Finally, the corresponding new `ResidueGroup` and `SegmentGroup` will be + created by the updated topology. + + Examples + -------- + To set custom segment IDs for the segments of the Universe:: + + atomwise_segids = ['A', 'A', 'B', 'B'] + u.set_groups(atomwise_segids=atomwise_segids) + + # Now the Universe has two segments with segIDs 'A' and 'B' + u.segments + >>> + + .. versionadded:: 2.10.0 + """ + if (atomwise_resids is None) and (atomwise_segids is None): + warnings.warn("Not setting groups. Please provide atomwise_resids or " + "atomwise_segids.") + return + + # resids + if atomwise_resids is None: + atomwise_resids = self.atoms.resids + + else: + # check the length of atomwise_resids + if len(atomwise_resids) != self.atoms.n_atoms: + raise ValueError( + "The length of atomwise_resids should be the same as " + "the number of atoms in the universe.") + + self.atomwise_resids_orig = self.atoms.resids + logger.info("The new resids replaces the current one. " + "The original resids is stored in " + "atomwise_resids_orig.") + + # segids + if atomwise_segids is None: + atomwise_segids = self.atoms.segids + + else: + # check the length of atomwise_segids + if len(atomwise_segids) != self.atoms.n_atoms: + raise ValueError( + "The length of atomwise_segids should be the same as " + "the number of atoms in the universe.") + + self.atomwise_segids_orig = self.atoms.segids + logger.info("The new resids replaces the current one. " + "The original segids is stored in " + "atomwise_segids_orig.") + + atomwise_resids = np.array(atomwise_resids, dtype=int) + atomwise_segids = np.array(atomwise_segids, dtype=object) + + _update_topology_by_ids(self, + atomwise_resids=atomwise_resids, + atomwise_segids=atomwise_segids) + _generate_from_topology(self) + def Merge(*args): """Create a new new :class:`Universe` from one or more diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py index f9c54a45b08..b168b0a67ae 100644 --- a/package/MDAnalysis/topology/PDBParser.py +++ b/package/MDAnalysis/topology/PDBParser.py @@ -178,6 +178,11 @@ class PDBParser(TopologyReaderBase): - bonds - formalcharges + Note that `PDBParser` accepts an optional keyword argument + ``force_chainids_to_segids``. If set to ``True``, the chain IDs (even if + empty values are in the chain ID column in the file) will forcibly be used + instead of the segment IDs for creating segments. + See Also -------- :class:`MDAnalysis.coordinates.PDB.PDBReader` @@ -207,7 +212,10 @@ class PDBParser(TopologyReaderBase): Removed type and mass guessing (attributes guessing takes place now through universe.guess_TopologyAttrs() API). .. versionchanged:: 2.10.0 - segID is read from 73-76 instead of 67-76. + segID is read from 73-76 instead of 67-76 and added the + `force_chainids_to_segids` keyword argument. Some infos in logger will + be generated if the segids is not present or if the chainids are not + completely equal to segids. """ format = ['PDB', 'ENT'] @@ -218,7 +226,7 @@ def parse(self, **kwargs): ------- MDAnalysis Topology object """ - top = self._parseatoms() + top = self._parseatoms(**kwargs) try: bonds = self._parsebonds(top.ids.values) @@ -235,7 +243,7 @@ def parse(self, **kwargs): return top - def _parseatoms(self): + def _parseatoms(self, **kwargs): """Create the initial Topology object""" resid_prev = 0 # resid looping hack @@ -325,6 +333,12 @@ def _parseatoms(self): "found in the PDB file.") segids = chainids + # If force_chainids_to_segids is set, use chainids as segids + if kwargs.get("force_chainids_to_segids", False): + logger.info("force_chainids_to_segids is set. " + "Using chain IDs as segment IDs.") + segids = chainids + n_atoms = len(serials) attrs = [] @@ -403,11 +417,13 @@ def _parseatoms(self): n_residues = len(resids) attrs.append(Resnums(resnums)) attrs.append(Resids(resids)) - attrs.append(Resnums(resids.copy())) attrs.append(ICodes(icodes)) attrs.append(Resnames(resnames)) - if any(segids) and not any(val is None for val in segids): + if ( + kwargs.get("force_chainids_to_segids", False) or + (any(segids) and not any(val is None for val in segids)) + ): segidx, (segids,) = change_squash((segids,), (segids,)) n_segments = len(segids) attrs.append(Segids(segids)) diff --git a/testsuite/MDAnalysisTests/core/test_universe.py b/testsuite/MDAnalysisTests/core/test_universe.py index 99eaec38a2c..d931966b42d 100644 --- a/testsuite/MDAnalysisTests/core/test_universe.py +++ b/testsuite/MDAnalysisTests/core/test_universe.py @@ -55,6 +55,8 @@ MMTF, CONECT, PDB_conect, + ITP_tip5p, + TPR, ) import MDAnalysis as mda @@ -1538,3 +1540,308 @@ def test_only_top(self): u = mda.Universe(t, to_guess=()) assert len(u.atoms) == 10 + + +class TestUniverseSetGroups: + @pytest.fixture() + def bad_seg(self): + bad_seg_str = """\ +ATOM 659 N THR A 315 22.716 15.055 -1.000 1.00 16.08 A N +ATOM 660 CA THR A 315 22.888 13.803 -0.302 1.00 0.00 C +ATOM 661 C THR A 315 22.006 12.700 -0.882 1.00 0.00 C +ATOM 662 O THR A 316 21.138 12.959 -1.727 1.00 16.25 A O +ATOM 663 CB THR B 314 22.481 13.956 1.182 1.00 0.00 B C +ATOM 664 CG2 THR B 315 23.384 14.924 1.927 1.00 0.00 C +ATOM 665 OG1 THR B 315 21.172 14.548 1.274 1.00 0.00 O +""" + return mda.Universe(StringIO(bad_seg_str), format="PDB") + + @pytest.fixture() + def good(self): + good_str = """\ +ATOM 659 N THR A 315 22.716 15.055 -1.000 1.00 16.08 A N +ATOM 660 CA THR A 315 22.888 13.803 -0.302 1.00 15.13 A C +ATOM 661 C THR A 315 22.006 12.700 -0.882 1.00 15.69 A C +ATOM 662 O THR A 316 21.138 12.959 -1.727 1.00 16.25 A O +ATOM 663 CB THR B 314 22.481 13.956 1.182 1.00 16.22 B C +ATOM 664 CG2 THR B 315 22.874 15.310 1.747 1.00 17.32 B C +ATOM 665 OG1 THR B 315 21.047 13.922 1.304 1.00 15.14 B O +""" + return mda.Universe(StringIO(good_str), format="PDB") + + @pytest.fixture() + def bad_gro(self): + bad_gro_str = """\ +Written by MDAnalysis + 7 + 2THR N 1 2.272 1.506 -0.100 + 2THR CA 2 2.289 1.380 -0.030 + 2THR C 3 2.201 1.270 -0.088 + 3THR O 4 2.114 1.296 -0.173 + 1THR CB 5 2.248 1.396 0.118 + 2THR CG2 6 2.287 1.531 0.175 + 2THR OG1 7 2.105 1.392 0.130 + 3.00000 3.00000 3.00000 +""" + return mda.Universe(StringIO(bad_gro_str), format="GRO") + + @pytest.fixture() + def itp(self): + return mda.Universe(ITP_tip5p, HW2_CHARGE=2, infer_system=True) + + @pytest.fixture() + def tpr(self): + return mda.Universe(TPR) + + def test_do_nothing(self, good): + # check do nothing (check the warning message) + with pytest.warns( + UserWarning, + match="Not setting groups. Please provide atomwise_resids or " + "atomwise_segids", + ): + good.set_groups() + + # check segments + assert_equal(["A", "B"], good.segments.segids) + # check residues + assert_equal([315, 316, 314, 315], good.residues.resids) + + def test_set_segments(self, bad_seg, good): + # check bad_seg with good + # [BEFORE] 5 segments and 5 residues in bad_seg + assert len(bad_seg.segments) == 5 # 5 segments + assert_equal(["A", "", "A", "B", ""], bad_seg.segments.segids) + assert len(bad_seg.residues) == 5 # 5 residues + assert_equal([315, 315, 316, 314, 315], bad_seg.residues.resids) + assert_equal( + ["THR", "THR", "THR", "THR", "THR"], bad_seg.residues.resnames + ) + assert_equal(["A", "", "A", "B", ""], bad_seg.residues.segids) + assert len(bad_seg.atoms) == 7 # 7 atoms + assert_equal([315, 315, 315, 316, 314, 315, 315], bad_seg.atoms.resids) + assert_equal(["THR"] * 7, bad_seg.atoms.resnames) + assert_equal(["A", "", "", "A", "B", "", ""], bad_seg.atoms.segids) + assert_equal( + ["N", "CA", "C", "O", "CB", "CG2", "OG1"], bad_seg.atoms.names + ) + assert_equal( + ["A", "A", "A", "A", "B", "B", "B"], bad_seg.atoms.chainIDs + ) + original_attrs = bad_seg._topology.attrs + + # [ACT] set new segids + bad_seg.set_groups(atomwise_segids=["A", "A", "A", "A", "B", "B", "B"]) + + # [AFTER] 2 segments and 4 residues in bad_seg + # segment level + assert len(bad_seg.segments) == len(good.segments) + assert len(bad_seg.segments) == 2 # 2 segments + assert_equal(bad_seg.segments.segids, good.segments.segids) + + # residue level + assert len(bad_seg.residues) == len(good.residues) + assert len(bad_seg.residues) == 4 # 4 residues + assert_equal(bad_seg.residues.resids, good.residues.resids) + assert_equal(bad_seg.residues.resnames, good.residues.resnames) + assert_equal(bad_seg.residues.segids, good.residues.segids) + + # atom level + assert len(bad_seg.atoms) == len(good.atoms) + assert len(bad_seg.atoms) == 7 # 7 atoms + assert_equal(bad_seg.atoms.resids, good.atoms.resids) + assert_equal(bad_seg.atoms.resnames, good.atoms.resnames) + assert_equal(bad_seg.atoms.segids, good.atoms.segids) + assert_equal(bad_seg.atoms.names, good.atoms.names) + assert_equal(bad_seg.atoms.chainIDs, good.atoms.chainIDs) + + # check attrs + assert len(bad_seg._topology.attrs) == len(original_attrs) + assert_equal( + sorted([each.attrname for each in bad_seg._topology.attrs]), + sorted([each.attrname for each in original_attrs]), + ) + + def test_set_residues(self, bad_gro, good): + # check bad_gro with good + # [BEFORE] resids in bad_gro + assert_equal([2, 3, 1, 2], bad_gro.residues.resids) + assert_equal([2, 2, 2, 3, 1, 2, 2], bad_gro.atoms.resids) + original_attrs = bad_gro._topology.attrs + + # [ACT] + bad_gro.set_groups( + atomwise_resids=[315, 315, 315, 316, 314, 315, 315], + ) + + # [AFTER] resids in bad_gro + assert len(bad_gro.residues) == len(good.residues) + assert_equal(bad_gro.residues.resids, good.residues.resids) + assert len(bad_gro.atoms) == len(good.atoms) + assert_equal(bad_gro.atoms.resids, good.atoms.resids) + assert len(bad_gro._topology.attrs) == len(original_attrs) + assert_equal( + sorted([each.attrname for each in bad_gro._topology.attrs]), + sorted([each.attrname for each in original_attrs]), + ) + + def test_set_groups_both(self, bad_gro, good): + # test: both atomwise_segids and atomwise_resids exist + # check bad_gro with good + # [BEFORE] 1 segment in bad_gro + assert len(bad_gro.segments) == 1 + assert_equal(["SYSTEM"], bad_gro.segments.segids) + assert len(bad_gro.residues) == 4 + assert_equal(["SYSTEM"] * 4, bad_gro.residues.segids) + assert_equal([2, 3, 1, 2], bad_gro.residues.resids) + assert len(bad_gro.atoms) == 7 + assert_equal(["SYSTEM"] * 7, bad_gro.atoms.segids) + assert_equal([2, 2, 2, 3, 1, 2, 2], bad_gro.atoms.resids) + original_attrs = bad_gro._topology.attrs + + # [ACT] set new segids and resids + bad_gro.set_groups( + atomwise_segids=good.atoms.segids, + atomwise_resids=good.atoms.resids, + ) + + # [AFTER] 1 segments in bad_gro + assert len(bad_gro.segments) == len(good.segments) + assert len(bad_gro.segments) == 2 + assert_equal(bad_gro.segments.segids, good.segments.segids) + assert len(bad_gro.residues) == len(good.residues) + assert len(bad_gro.residues) == 4 + assert_equal(bad_gro.residues.segids, good.residues.segids) + assert_equal(bad_gro.residues.resids, good.residues.resids) + assert len(bad_gro.atoms) == 7 + assert_equal(bad_gro.atoms.segids, good.atoms.segids) + assert_equal(bad_gro.atoms.resids, good.atoms.resids) + assert len(bad_gro._topology.attrs) == len(original_attrs) + assert_equal( + sorted([each.attrname for each in bad_gro._topology.attrs]), + sorted([each.attrname for each in original_attrs]), + ) + + def test_value_error(self, good): + # [BEFORE] atomwise_segids + assert len(good.atoms) == 7 + + # [ACT] set new segids and resids + with pytest.raises(ValueError): + good.set_groups( + atomwise_segids=["A", "A"], + ) + + with pytest.raises(ValueError): + good.set_groups( + atomwise_resids=[30, 31], + ) + + def test_with_ITP(self, itp): + # [BEFORE] 1 segment in itp + assert len(itp.segments) == 1 + assert_equal(["SOL"], itp.segments.segids) + assert len(itp.residues) == 1 + assert_equal([1], itp.residues.resids) + assert_equal([1], itp.residues.chargegroups) + assert len(itp.atoms) == 5 + assert_equal([1] * 5, itp.atoms.resids) + assert_equal([1] * 5, itp.atoms.chargegroups) + original_attrs = itp._topology.attrs + + # [ACT] set new segids and resids + itp.set_groups( + atomwise_segids=["A", "A", "B", "B", "B"], + atomwise_resids=[3, 3, 3, 3, 4], + ) + + # [AFTER] 2 segments in itp + assert len(itp.segments) == 2 + assert_equal(["A", "B"], itp.segments.segids) + assert len(itp.residues) == 3 + assert_equal(["A", "B", "B"], itp.residues.segids) + assert_equal([3, 3, 4], itp.residues.resids) + assert_equal([1, 1, 1], itp.residues.chargegroups) + assert len(itp.atoms) == 5 + assert_equal([3, 3, 3, 3, 4], itp.atoms.resids) + assert_equal([1] * 5, itp.atoms.chargegroups) + assert len(itp._topology.attrs) == len(original_attrs) + assert_equal( + sorted([each.attrname for each in itp._topology.attrs]), + sorted([each.attrname for each in original_attrs]), + ) + + def test_with_TPR(self, tpr): + # [BEFORE] + assert len(tpr.segments) == 3 + assert_equal( + ["seg_0_AKeco", "seg_1_SOL", "seg_2_NA+"], tpr.segments.segids + ) + assert len(tpr.residues) == 11302 + assert_equal(range(1, 11303, 1), tpr.residues.resids) + orignial_res_molnums = tpr.residues.molnums + assert len(tpr.atoms) == 47681 + orignial_resids = tpr.atoms.resids + orignial_atm_molnums = tpr.atoms.molnums + original_attrs = tpr._topology.attrs + + # [ACT] set new segids and resids + sub_dict = {"seg_0_AKeco": "A", "seg_1_SOL": "B", "seg_2_NA+": "C"} + new_segids = [sub_dict[each] for each in tpr.atoms.segids] + tpr.set_groups( + atomwise_segids=new_segids, + atomwise_resids=tpr.atoms.resids + 120, + ) + + # [AFTER] + assert len(tpr.segments) == 3 + assert_equal(["A", "B", "C"], tpr.segments.segids) + assert len(tpr.residues) == 11302 + assert_equal(range(1 + 120, 11303 + 120, 1), tpr.residues.resids) + assert_equal(orignial_res_molnums, tpr.residues.molnums) + assert len(tpr.atoms) == 47681 + assert_equal(orignial_resids + 120, tpr.atoms.resids) + assert_equal(orignial_atm_molnums, tpr.atoms.molnums) + assert len(tpr._topology.attrs) == len(original_attrs) + assert_equal( + sorted([each.attrname for each in tpr._topology.attrs]), + sorted([each.attrname for each in original_attrs]), + ) + + def test_no_affect_custom_attrs(self, good): + # [BEFORE-1] check custom attributes + with pytest.raises(NoDataError): + good.atoms.charges + assert len(good._topology.attrs) == 18 + + # [ACT-1] assign custom attributes + good.add_TopologyAttr("charges", [0, 1, -1, 0, -1, 0, 1]) + + # [AFTER-1] check custom attributes + assert_equal([0, 1, -1, 0, -1, 0, 1], good.atoms.charges) + assert len(good._topology.attrs) == 19 + + # [BEFORE-2] check segids and resids + assert len(good.segments) == 2 + assert_equal(["A", "B"], good.segments.segids) + assert len(good.residues) == 4 + assert_equal([315, 316, 314, 315], good.residues.resids) + original_attrs = good._topology.attrs + + # [ACT-2] set new segids and resids + good.set_groups( + atomwise_segids=["C", "C", "C", "C", "C", "C", "C"], + atomwise_resids=[1, 1, 1, 2, 2, 2, 3], + ) + + # [AFTER-2] check segids and resids + assert len(good.segments) == 1 + assert_equal(["C"], good.segments.segids) + assert len(good.residues) == 3 + assert_equal([1, 2, 3], good.residues.resids) + assert_equal([0, 1, -1, 0, -1, 0, 1], good.atoms.charges) + assert len(good._topology.attrs) == 19 + assert_equal( + sorted([each.attrname for each in original_attrs]), + sorted([each.attrname for each in good._topology.attrs]), + ) diff --git a/testsuite/MDAnalysisTests/topology/test_pdb.py b/testsuite/MDAnalysisTests/topology/test_pdb.py index 146462969af..736e0f25f3b 100644 --- a/testsuite/MDAnalysisTests/topology/test_pdb.py +++ b/testsuite/MDAnalysisTests/topology/test_pdb.py @@ -438,3 +438,67 @@ def test_PDB_bad_charges(infile, entry): with pytest.warns(UserWarning, match=wmsg): u = mda.Universe(StringIO(infile), format="PDB") assert not hasattr(u, "formalcharges") + + +def test_force_chainids_to_segids(): + # get examples from issue 2874 with minor modifications + + res1_str = """\ +ATOM 659 N THR B 315 22.716 15.055 -1.000 1.00 16.08 A N +ATOM 660 CA THR B 315 22.888 13.803 -0.302 1.00 0.00 C +ATOM 661 C THR B 315 22.006 12.700 -0.882 1.00 0.00 C +ATOM 662 O THR B 315 21.138 12.959 -1.727 1.00 16.25 A O +ATOM 663 CB THR B 315 22.481 13.956 1.182 1.00 0.00 C +ATOM 664 CG2 THR B 315 23.384 14.924 1.927 1.00 0.00 C +ATOM 665 OG1 THR B 315 21.172 14.548 1.274 1.00 0.00 O +""" + + res2_str = """\ +ATOM 659 N THR B 315 22.716 15.055 -1.000 1.00 16.08 A N +ATOM 660 CA THR B 315 22.888 13.803 -0.302 1.00 15.13 A C +ATOM 661 C THR B 315 22.006 12.700 -0.882 1.00 15.69 A C +ATOM 662 O THR B 315 21.138 12.959 -1.727 1.00 16.25 A O +ATOM 663 CB THR B 315 22.481 13.956 1.182 1.00 16.22 A C +ATOM 664 CG2 THR B 315 22.874 15.310 1.747 1.00 17.32 A C +ATOM 665 OG1 THR B 315 21.047 13.922 1.304 1.00 15.14 A O +""" + res3_str = """\ +ATOM 659 N THR 315 22.716 15.055 -1.000 1.00 16.08 A N +ATOM 660 CA THR 315 22.888 13.803 -0.302 1.00 15.13 A C +ATOM 661 C THR 315 22.006 12.700 -0.882 1.00 15.69 A C +ATOM 662 O THR 315 21.138 12.959 -1.727 1.00 16.25 A O +ATOM 663 CB THR 315 22.481 13.956 1.182 1.00 16.22 A C +ATOM 664 CG2 THR 315 22.874 15.310 1.747 1.00 17.32 A C +ATOM 665 OG1 THR 315 21.047 13.922 1.304 1.00 15.14 A O +""" + + res1_not_force = mda.Universe( + StringIO(res1_str), format="PDB", force_chainids_to_segids=False + ) + res1_force = mda.Universe( + StringIO(res1_str), format="PDB", force_chainids_to_segids=True + ) + res2_not_force = mda.Universe( + StringIO(res2_str), format="PDB", force_chainids_to_segids=False + ) + res2_force = mda.Universe( + StringIO(res2_str), format="PDB", force_chainids_to_segids=True + ) + res3_force = mda.Universe( + StringIO(res3_str), format="PDB", force_chainids_to_segids=True + ) + + assert len(res1_not_force.segments) == 4 + assert_equal(["A", "", "A", ""], res1_not_force.segments.segids) + assert len(res1_not_force.residues) == 4 + assert_equal([315, 315, 315, 315], res1_not_force.residues.resids) + assert len(res1_force.segments) == 1 + assert_equal(["B"], res1_force.segments.segids) + assert len(res1_force.residues) == 1 + assert_equal([315], res1_force.residues.resids) + assert len(res2_not_force.segments) == 1 + assert_equal(["A"], res2_not_force.segments.segids) + assert len(res2_force.segments) == 1 + assert_equal(["B"], res2_force.segments.segids) + assert len(res3_force.segments) == 1 + assert_equal([""], res3_force.segments.segids)