|
| 1 | +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*- |
| 2 | +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 |
| 3 | +# |
| 4 | +# MDAnalysis --- https://www.mdanalysis.org |
| 5 | +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors |
| 6 | +# (see the file AUTHORS for the full list of names) |
| 7 | +# |
| 8 | +# Released under the Lesser GNU Public Licence, v2.1 or any higher version |
| 9 | +# |
| 10 | +# Please cite your use of MDAnalysis in published work: |
| 11 | +# |
| 12 | +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, |
| 13 | +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. |
| 14 | +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics |
| 15 | +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th |
| 16 | +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. |
| 17 | +# doi: 10.25080/majora-629e541a-00e |
| 18 | +# |
| 19 | +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. |
| 20 | +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. |
| 21 | +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 |
| 22 | +# |
| 23 | + |
| 24 | +""" |
| 25 | +PDB Fetchers --- :mod:`MDAnalysis.fetch.pdb` |
| 26 | +============================================ |
| 27 | +
|
| 28 | +This suite of functions download structure files from the Research Collaboratory for |
| 29 | +Structural Bioinformatics (RCSB) `Protein Data Batabank`_ (PDB). |
| 30 | +
|
| 31 | +.. _Protein Data Batabank: https://www.rcsb.org/ |
| 32 | +
|
| 33 | +Variables |
| 34 | +--------- |
| 35 | +
|
| 36 | +.. autodata:: DEFAULT_CACHE_NAME_DOWNLOADER |
| 37 | +
|
| 38 | +
|
| 39 | +Functions |
| 40 | +--------- |
| 41 | +
|
| 42 | +.. autofunction:: from_PDB |
| 43 | +
|
| 44 | +""" |
| 45 | +from pathlib import Path |
| 46 | + |
| 47 | +try: |
| 48 | + import pooch |
| 49 | +except ImportError: |
| 50 | + HAS_POOCH = False |
| 51 | +else: |
| 52 | + HAS_POOCH = True |
| 53 | + |
| 54 | +#: Name of the :mod:`pooch` cache directory ``pooch.os_cache(DEFAULT_CACHE_NAME_DOWNLOADER)``; |
| 55 | +#: see :func:`pooch.os_cache` for further details. |
| 56 | +#: |
| 57 | +#: .. versionadded:: 2.11.0 |
| 58 | +DEFAULT_CACHE_NAME_DOWNLOADER = "MDAnalysis_pdbs" |
| 59 | + |
| 60 | +# These file formats are here https://www.rcsb.org/docs/programmatic-access/file-download-services#pdb-entry-files" |
| 61 | +SUPPORTED_FILE_FORMATS_DOWNLOADER = ( |
| 62 | + "cif", |
| 63 | + "cif.gz", |
| 64 | + "bcif", |
| 65 | + "bcif.gz", |
| 66 | + "xml", |
| 67 | + "xml.gz", |
| 68 | + "pdb", |
| 69 | + "pdb.gz", |
| 70 | + "pdb1", |
| 71 | + "pdb1.gz", |
| 72 | +) |
| 73 | + |
| 74 | + |
| 75 | +def from_PDB( |
| 76 | + pdb_ids, |
| 77 | + cache_path=None, |
| 78 | + progressbar=False, |
| 79 | + file_format="cif.gz", |
| 80 | +): |
| 81 | + """ |
| 82 | + Download one or more PDB files from the RCSB Protein Data Bank and cache |
| 83 | + them locally. |
| 84 | +
|
| 85 | + Given one or multiple PDB IDs, downloads the corresponding structure files |
| 86 | + format and stores them in a local cache directory. If files are cached on |
| 87 | + disk, *from_PDB* will skip the download and use the cached version instead. |
| 88 | +
|
| 89 | + Returns the path(s) as a :class:`~pathlib.Path` to the downloaded file(s). |
| 90 | +
|
| 91 | + Parameters |
| 92 | + ---------- |
| 93 | + pdb_ids : str or sequence of str |
| 94 | + A single PDB ID as a string, or a sequence of PDB IDs to fetch. |
| 95 | + cache_path : str or pathlib.Path |
| 96 | + Directory where downloaded file(s) will be cached. |
| 97 | + The default ``None`` argument uses the :mod:`pooch` default cache with |
| 98 | + project name :data:`DEFAULT_CACHE_NAME_DOWNLOADER`. |
| 99 | + file_format : str |
| 100 | + The file extension/format to download (e.g., "cif", "pdb"). |
| 101 | + See the Notes section below for a list of all supported file formats. |
| 102 | + progressbar : bool |
| 103 | + If True, display a progress bar during file downloads. Default is False. |
| 104 | +
|
| 105 | + Returns |
| 106 | + ------- |
| 107 | + :class:`~pathlib.Path` or list of :class:`~pathlib.Path` |
| 108 | + The path(s) to the downloaded file(s). Returns a single |
| 109 | + :class:`~pathlib.Path` if a single pdb id is given, or a list of |
| 110 | + :class:`~pathlib.Path` if multiple pdb ids are provided. |
| 111 | +
|
| 112 | + Raises |
| 113 | + ------ |
| 114 | + ValueError |
| 115 | + For an invalid file format. Supported file formats are under Notes. |
| 116 | +
|
| 117 | + :class:`requests.exceptions.HTTPError` |
| 118 | + If an invalid PDB code is specified. |
| 119 | +
|
| 120 | + Notes |
| 121 | + ----- |
| 122 | + This function uses the `RCSB File Download Services`_ for directly downloading |
| 123 | + structure files via https. |
| 124 | +
|
| 125 | + .. _`RCSB File Download Services`: |
| 126 | + https://www.rcsb.org/docs/programmatic-access/file-download-services |
| 127 | +
|
| 128 | + The RCSB currently provides data in ``'cif'`` , ``'cif.gz'`` , ``'bcif'`` , |
| 129 | + ``'bcif.gz'`` , ``'xml'`` , ``'xml.gz'`` , ``'pdb'`` , ``'pdb.gz'``, |
| 130 | + ``'pdb1'``, ``'pdb1.gz'`` file formats and can therefore be downloaded. |
| 131 | + Not all of these formats can be currently read with MDAnalysis. |
| 132 | +
|
| 133 | + Caching, controlled by the `cache_path` parameter, is handled internally by |
| 134 | + :mod:`pooch`. The default cache name is taken from |
| 135 | + :data:`DEFAULT_CACHE_NAME_DOWNLOADER`. To clear cache (and subsequently force |
| 136 | + re-fetching), it is required to delete the cache folder as specified by |
| 137 | + `cache_path`. |
| 138 | +
|
| 139 | + Examples |
| 140 | + -------- |
| 141 | + Download a single PDB file: |
| 142 | +
|
| 143 | + >>> mda.fetch.from_PDB("1AKE", file_format="cif") |
| 144 | + './MDAnalysis_pdbs/1AKE.cif' |
| 145 | +
|
| 146 | + Download multiple PDB files with a progress bar: |
| 147 | +
|
| 148 | + >>> mda.fetch.from_PDB(["1AKE", "4BWZ"], progressbar=True) |
| 149 | + ['./MDAnalysis_pdbs/1AKE.pdb.gz', './MDAnalysis_pdbs/4BWZ.pdb.gz'] |
| 150 | +
|
| 151 | + Download a single PDB file and convert it to a universe: |
| 152 | +
|
| 153 | + >>> mda.Universe(mda.fetch.from_PDB("1AKE"), file_format="pdb.gz") |
| 154 | + <Universe with 3816 atoms> |
| 155 | +
|
| 156 | + Download multiple PDB files and convert each of them into a universe: |
| 157 | +
|
| 158 | + >>> [mda.Universe(pdb) for pdb in mda.fetch.from_PDB(["1AKE", "4BWZ"], progressbar=True)] |
| 159 | + [<Universe with 3816 atoms>, <Universe with 2824 atoms>] |
| 160 | +
|
| 161 | +
|
| 162 | + .. versionadded:: 2.11.0 |
| 163 | + """ |
| 164 | + |
| 165 | + if not HAS_POOCH: |
| 166 | + raise ModuleNotFoundError( |
| 167 | + "pooch is needed as a dependency for from_PDB()" |
| 168 | + ) |
| 169 | + elif file_format not in SUPPORTED_FILE_FORMATS_DOWNLOADER: |
| 170 | + raise ValueError( |
| 171 | + "Invalid file format. Supported file formats " |
| 172 | + f"are {SUPPORTED_FILE_FORMATS_DOWNLOADER}" |
| 173 | + ) |
| 174 | + |
| 175 | + if isinstance(pdb_ids, str): |
| 176 | + _pdb_ids = (pdb_ids,) |
| 177 | + else: |
| 178 | + _pdb_ids = pdb_ids |
| 179 | + |
| 180 | + if cache_path is None: |
| 181 | + cache_path = pooch.os_cache(DEFAULT_CACHE_NAME_DOWNLOADER) |
| 182 | + |
| 183 | + # Have to do this dictionary approach instead of using pooch.retrieve in order |
| 184 | + # to prevent the hardcoded known_hash warning from showing up. |
| 185 | + registry_dictionary = { |
| 186 | + f"{pdb_id}.{file_format}": None for pdb_id in _pdb_ids |
| 187 | + } |
| 188 | + |
| 189 | + downloader = pooch.create( |
| 190 | + path=cache_path, |
| 191 | + base_url="https://files.wwpdb.org/download/", |
| 192 | + registry=registry_dictionary, |
| 193 | + ) |
| 194 | + |
| 195 | + paths = [ |
| 196 | + Path(downloader.fetch(fname=file_name, progressbar=progressbar)) |
| 197 | + for file_name in registry_dictionary.keys() |
| 198 | + ] |
| 199 | + |
| 200 | + return paths if not isinstance(pdb_ids, str) else paths[0] |
0 commit comments