Skip to content

Commit 7938070

Browse files
authored
Merge pull request #723 from macs3-project/add_api_documentation
Added documentation and example code block where applicable.
2 parents a4a9b83 + d814cd5 commit 7938070

16 files changed

Lines changed: 1028 additions & 121 deletions

File tree

MACS3/IO/BAM.py

Lines changed: 182 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,19 @@ def __str__(self):
144144

145145
@cython.cclass
146146
class BAIFile:
147-
"""In-memory representation of a BAM index (BAI) file."""
147+
"""In-memory representation of a BAM index (BAI) file.
148+
149+
Attributes:
150+
filename: Path to the ``.bai`` file.
151+
magic: File magic header (should be ``b"BAI\\1"``).
152+
n_ref: Number of reference sequences.
153+
metadata: metadata for reference sequences.
154+
n_bins: Total number of bins across references.
155+
n_chunks: Total number of chunks across references.
156+
n_mapped: Total mapped reads.
157+
n_unmapped: Total unmapped reads.
158+
bins: list of bin Ids.
159+
"""
148160
filename: str # filename
149161
fhd: object # file handler
150162
magic: bytes # magic code for this file
@@ -266,12 +278,47 @@ def __load_bins(self):
266278

267279
@cython.ccall
268280
def get_chunks_by_bin(self, ref_n: cython.uint, bin_n: cython.uint) -> list:
269-
"""Return sorted BGZF chunks for ``bin_n`` on reference ``ref_n``."""
281+
"""Return sorted BGZF chunks for ``bin_n`` on reference ``ref_n``.
282+
283+
Args:
284+
ref_n: Reference index in the BAI.
285+
bin_n: Bin identifier.
286+
287+
Returns:
288+
list: Sorted list of BGZF chunks for the bin.
289+
290+
Examples:
291+
.. code-block:: python
292+
293+
from MACS3.IO.BAM import BAIFile
294+
bai = BAIFile("example.bam.bai")
295+
bai.open()
296+
bai.read()
297+
chunks = bai.get_chunks_by_bin(ref_n=0, bin_n=4681)
298+
"""
270299
return sorted(self.bins[ref_n].get(bin_n, []))
271300

272301
@cython.ccall
273302
def get_chunks_by_list_of_bins(self, ref_n: cython.uint, bins: list) -> list:
274-
"""Return sorted chunks for the unique set of bins provided."""
303+
"""Return sorted chunks for the unique set of bins provided.
304+
305+
Args:
306+
ref_n: Reference index in the BAI.
307+
bins: list of bin identifiers.
308+
309+
Returns:
310+
list: sorted list of BGZF chunks for the bins.
311+
312+
Examples:
313+
.. code-block:: python
314+
315+
from MACS3.IO.BAM import BAIFile
316+
bai = BAIFile("example.bam.bai")
317+
bai.open()
318+
bai.read()
319+
bins = [4681, 4682, 585]
320+
chunks = bai.get_chunks_by_list_of_bins(ref_n=0, bins=bins)
321+
"""
275322
bin_n: cython.uint
276323
chunks: list = []
277324
bin_set: set
@@ -283,12 +330,46 @@ def get_chunks_by_list_of_bins(self, ref_n: cython.uint, bins: list) -> list:
283330

284331
@cython.ccall
285332
def get_metadata_by_refseq(self, ref_n: cython.uint) -> dict:
286-
"""Return pseudo-bin metadata for reference ``ref_n``."""
333+
"""Return pseudo-bin metadata for reference ``ref_n``.
334+
335+
Args:
336+
ref_n: Reference index in the BAI.
337+
338+
Returns:
339+
dict: Metadata for the reference.
340+
341+
Examples:
342+
.. code-block:: python
343+
344+
from MACS3.IO.BAM import BAIFile
345+
bai = BAIFile("example.bam.bai")
346+
bai.open()
347+
bai.read()
348+
meta = bai.get_metadata_by_refseq(ref_n=0)
349+
"""
287350
return self.metadata[ref_n]
288351

289352
@cython.ccall
290353
def get_chunks_by_region(self, ref_n: cython.uint, beg: cython.uint, end: cython.uint) -> list:
291-
"""Return BGZF chunks overlapping ``[beg, end)`` on reference ``ref_n``."""
354+
"""Return BGZF chunks overlapping ``[beg, end)`` on reference ``ref_n``.
355+
356+
Args:
357+
ref_n: Reference index in the BAI.
358+
beg: start coordinate.
359+
end: end coordinate.
360+
361+
Returns:
362+
list: Sorted list of BGZF chunks covering the region.
363+
364+
Examples:
365+
.. code-block:: python
366+
367+
from MACS3.IO.BAM import BAIFile
368+
bai = BAIFile("example.bam.bai")
369+
bai.open()
370+
bai.read()
371+
chunks = bai.get_chunks_by_region(ref_n=0, beg=1_000_000, end=1_010_000)
372+
"""
292373
bins: list
293374
chunks: list
294375

@@ -298,7 +379,25 @@ def get_chunks_by_region(self, ref_n: cython.uint, beg: cython.uint, end: cython
298379

299380
@cython.ccall
300381
def get_chunks_by_list_of_regions(self, ref_n: cython.uint, regions: list) -> list:
301-
"""Return BGZF chunks overlapping any region in ``regions``."""
382+
"""Return BGZF chunks overlapping any region in ``regions``.
383+
384+
Args:
385+
ref_n: Reference index in the BAI.
386+
regions: Iterable of ``(beg, end)`` tuples.
387+
388+
Returns:
389+
list: Sorted list of BGZF chunk tuples covering the regions.
390+
391+
Examples:
392+
.. code-block:: python
393+
394+
from MACS3.IO.BAM import BAIFile
395+
bai = BAIFile("example.bam.bai")
396+
bai.open()
397+
bai.read()
398+
regions = [(1_000, 2_000), (50_000, 55_000)]
399+
chunks = bai.get_chunks_by_list_of_regions(ref_n=0, regions=regions)
400+
"""
302401
i: int
303402
temp_bins: list
304403
bins: list = []
@@ -314,7 +413,25 @@ def get_chunks_by_list_of_regions(self, ref_n: cython.uint, regions: list) -> li
314413

315414
@cython.ccall
316415
def get_coffset_by_region(self, ref_n: cython.uint, beg: cython.uint, end: cython.uint) -> cython.ulong:
317-
"""Return the BGZF compressed offset for the leftmost overlapping block."""
416+
"""Return the BGZF compressed offset for the leftmost overlapping block.
417+
418+
Args:
419+
ref_n: Reference index in the BAI.
420+
beg: start coordinate.
421+
end: end coordinate.
422+
423+
Returns:
424+
int: Compressed BGZF block offset, or 0 if no chunks overlap.
425+
426+
Examples:
427+
.. code-block:: python
428+
429+
from MACS3.IO.BAM import BAIFile
430+
bai = BAIFile("example.bam.bai")
431+
bai.open()
432+
bai.read()
433+
coffset = bai.get_coffset_by_region(ref_n=0, beg=1_000_000, end=1_010_000)
434+
"""
318435
voffset_tmp: cython.ulong
319436
coffset_tmp: cython.ulong
320437
chunks: list
@@ -334,7 +451,25 @@ def get_coffset_by_region(self, ref_n: cython.uint, beg: cython.uint, end: cytho
334451

335452
@cython.ccall
336453
def get_coffsets_by_list_of_regions(self, ref_n: cython.uint, regions: list) -> cython.ulong:
337-
"""Return compressed offsets for the leftmost block of each region."""
454+
"""Return compressed offsets for the leftmost block of each region.
455+
456+
Args:
457+
ref_n: Reference index in the BAI.
458+
regions: list of regions.
459+
460+
Returns:
461+
list: Compressed offsets for each region, in input order.
462+
463+
Examples:
464+
.. code-block:: python
465+
466+
from MACS3.IO.BAM import BAIFile
467+
bai = BAIFile("example.bam.bai")
468+
bai.open()
469+
bai.read()
470+
regions = [(1_000, 2_000), (50_000, 55_000)]
471+
coffsets = bai.get_coffsets_by_list_of_regions(ref_n=0, regions=regions)
472+
"""
338473
beg: cython.uint
339474
end: cython.uint
340475
i: cython.int
@@ -357,6 +492,17 @@ class BAMaccessor:
357492
The accessor reads headers via gzip for compatibility, but seeks
358493
directly to BGZF blocks when fetching alignments for specific
359494
regions.
495+
496+
Attributes:
497+
bam_filename: Path to the BAM file.
498+
bai_filename: Path to ``.bai`` file.
499+
bamfile: BAM file handler "rb" mode.
500+
baifile: BAI file handler.
501+
references: Reference/chromosome names in BAM order.
502+
rlengths: Lengths of reference/chromosomes.
503+
bgzf_block_cache: Cache of decompressed bgzf_block.
504+
coffset_cache: coffset of the cached bgzf_block.
505+
noffset_cache: coffset of the next block of the cached bgzf_block.
360506
"""
361507
# all private
362508
bam_filename: str # BAM filename
@@ -392,7 +538,11 @@ def __init__(self, BAM_filename: str):
392538

393539
@cython.ccall
394540
def close(self):
395-
"""Close the underlying BAM stream."""
541+
"""Close the underlying BAM stream.
542+
543+
Returns:
544+
None
545+
"""
396546
self.bamfile.close()
397547

398548
@cython.cfunc
@@ -457,12 +607,20 @@ def __check_sorted(self, header: bytes) -> bool:
457607

458608
@cython.ccall
459609
def get_chromosomes(self) -> list:
460-
"""Return reference names in header order."""
610+
"""Return reference names in header order.
611+
612+
Returns:
613+
list: Reference/chromosome.
614+
"""
461615
return self.references
462616

463617
@cython.ccall
464618
def get_rlengths(self) -> dict:
465-
"""Return reference lengths keyed by reference name."""
619+
"""Return reference lengths keyed by reference name.
620+
621+
Returns:
622+
dict: Mapping of reference name to length.
623+
"""
466624
return self.rlengths
467625

468626
@cython.ccall
@@ -485,13 +643,24 @@ def __decode_voffset(self, voffset: cython.ulong) -> tuple:
485643

486644
@cython.ccall
487645
def __seek(self, offset: cython.ulong) -> bool:
488-
"""Seek to a compressed BGZF block offset within the BAM file."""
646+
"""Seek to a compressed BGZF block offset within the BAM file.
647+
648+
Args:
649+
offset: Compressed BGZF block offset.
650+
651+
Returns:
652+
bool: ``True`` after seeking.
653+
"""
489654
self.bamfile.seek(offset, 0)
490655
return True
491656

492657
@cython.ccall
493658
def __retrieve_cdata_from_bgzf_block(self) -> bool:
494-
"""Decompress the next BGZF block and cache the uncompressed payload."""
659+
"""Decompress the next BGZF block and cache the uncompressed payload.
660+
661+
Returns:
662+
bool: ``True`` after updating the cache.
663+
"""
495664
xlen: cython.ushort
496665
bsize: cython.ushort
497666
extra: bytes

MACS3/IO/BedGraphIO.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,13 @@ def read_bedGraph(self, baseline_value: cython.double = 0):
6565
6666
Returns:
6767
bedGraphTrackI: Populated track instance.
68+
69+
Example:
70+
.. code-block:: python
71+
72+
from MACS3.IO.BedGraphIO import bedGraphIO
73+
bio = bedGraphIO("signal.bdg")
74+
track = bio.read_bedGraph(baseline_value=0)
6875
"""
6976
i: bytes
7077

@@ -96,6 +103,13 @@ def write_bedGraph(self, name: str = "", description: str = "",
96103
name: Track name used in the optional header line.
97104
description: Track description used in the optional header line.
98105
trackline: Whether to emit a UCSC ``track`` header.
106+
107+
Examples:
108+
.. code-block:: python
109+
110+
from MACS3.IO.BedGraphIO import bedGraphIO
111+
bio = bedGraphIO("signal_out.bdg")
112+
bio.write_bedGraph(name="Example", description="Demo track")
99113
"""
100114
pre: cython.int
101115
pos: cython.int

0 commit comments

Comments
 (0)