@@ -144,7 +144,19 @@ def __str__(self):
144144
145145@cython .cclass
146146class BAIFile :
147- """In-memory representation of a BAM index (BAI) file."""
147+ """In-memory representation of a BAM index (BAI) file.
148+
149+ Attributes:
150+ filename: Path to the ``.bai`` file.
151+ magic: File magic header (should be ``b"BAI\\ 1"``).
152+ n_ref: Number of reference sequences.
153+ metadata: metadata for reference sequences.
154+ n_bins: Total number of bins across references.
155+ n_chunks: Total number of chunks across references.
156+ n_mapped: Total mapped reads.
157+ n_unmapped: Total unmapped reads.
158+ bins: list of bin Ids.
159+ """
148160 filename : str # filename
149161 fhd : object # file handler
150162 magic : bytes # magic code for this file
@@ -266,12 +278,47 @@ def __load_bins(self):
266278
267279 @cython .ccall
268280 def get_chunks_by_bin (self , ref_n : cython .uint , bin_n : cython .uint ) -> list :
269- """Return sorted BGZF chunks for ``bin_n`` on reference ``ref_n``."""
281+ """Return sorted BGZF chunks for ``bin_n`` on reference ``ref_n``.
282+
283+ Args:
284+ ref_n: Reference index in the BAI.
285+ bin_n: Bin identifier.
286+
287+ Returns:
288+ list: Sorted list of BGZF chunks for the bin.
289+
290+ Examples:
291+ .. code-block:: python
292+
293+ from MACS3.IO.BAM import BAIFile
294+ bai = BAIFile("example.bam.bai")
295+ bai.open()
296+ bai.read()
297+ chunks = bai.get_chunks_by_bin(ref_n=0, bin_n=4681)
298+ """
270299 return sorted (self .bins [ref_n ].get (bin_n , []))
271300
272301 @cython .ccall
273302 def get_chunks_by_list_of_bins (self , ref_n : cython .uint , bins : list ) -> list :
274- """Return sorted chunks for the unique set of bins provided."""
303+ """Return sorted chunks for the unique set of bins provided.
304+
305+ Args:
306+ ref_n: Reference index in the BAI.
307+ bins: list of bin identifiers.
308+
309+ Returns:
310+ list: sorted list of BGZF chunks for the bins.
311+
312+ Examples:
313+ .. code-block:: python
314+
315+ from MACS3.IO.BAM import BAIFile
316+ bai = BAIFile("example.bam.bai")
317+ bai.open()
318+ bai.read()
319+ bins = [4681, 4682, 585]
320+ chunks = bai.get_chunks_by_list_of_bins(ref_n=0, bins=bins)
321+ """
275322 bin_n : cython .uint
276323 chunks : list = []
277324 bin_set : set
@@ -283,12 +330,46 @@ def get_chunks_by_list_of_bins(self, ref_n: cython.uint, bins: list) -> list:
283330
284331 @cython .ccall
285332 def get_metadata_by_refseq (self , ref_n : cython .uint ) -> dict :
286- """Return pseudo-bin metadata for reference ``ref_n``."""
333+ """Return pseudo-bin metadata for reference ``ref_n``.
334+
335+ Args:
336+ ref_n: Reference index in the BAI.
337+
338+ Returns:
339+ dict: Metadata for the reference.
340+
341+ Examples:
342+ .. code-block:: python
343+
344+ from MACS3.IO.BAM import BAIFile
345+ bai = BAIFile("example.bam.bai")
346+ bai.open()
347+ bai.read()
348+ meta = bai.get_metadata_by_refseq(ref_n=0)
349+ """
287350 return self .metadata [ref_n ]
288351
289352 @cython .ccall
290353 def get_chunks_by_region (self , ref_n : cython .uint , beg : cython .uint , end : cython .uint ) -> list :
291- """Return BGZF chunks overlapping ``[beg, end)`` on reference ``ref_n``."""
354+ """Return BGZF chunks overlapping ``[beg, end)`` on reference ``ref_n``.
355+
356+ Args:
357+ ref_n: Reference index in the BAI.
358+ beg: start coordinate.
359+ end: end coordinate.
360+
361+ Returns:
362+ list: Sorted list of BGZF chunks covering the region.
363+
364+ Examples:
365+ .. code-block:: python
366+
367+ from MACS3.IO.BAM import BAIFile
368+ bai = BAIFile("example.bam.bai")
369+ bai.open()
370+ bai.read()
371+ chunks = bai.get_chunks_by_region(ref_n=0, beg=1_000_000, end=1_010_000)
372+ """
292373 bins : list
293374 chunks : list
294375
@@ -298,7 +379,25 @@ def get_chunks_by_region(self, ref_n: cython.uint, beg: cython.uint, end: cython
298379
299380 @cython .ccall
300381 def get_chunks_by_list_of_regions (self , ref_n : cython .uint , regions : list ) -> list :
301- """Return BGZF chunks overlapping any region in ``regions``."""
382+ """Return BGZF chunks overlapping any region in ``regions``.
383+
384+ Args:
385+ ref_n: Reference index in the BAI.
386+ regions: Iterable of ``(beg, end)`` tuples.
387+
388+ Returns:
389+ list: Sorted list of BGZF chunk tuples covering the regions.
390+
391+ Examples:
392+ .. code-block:: python
393+
394+ from MACS3.IO.BAM import BAIFile
395+ bai = BAIFile("example.bam.bai")
396+ bai.open()
397+ bai.read()
398+ regions = [(1_000, 2_000), (50_000, 55_000)]
399+ chunks = bai.get_chunks_by_list_of_regions(ref_n=0, regions=regions)
400+ """
302401 i : int
303402 temp_bins : list
304403 bins : list = []
@@ -314,7 +413,25 @@ def get_chunks_by_list_of_regions(self, ref_n: cython.uint, regions: list) -> li
314413
315414 @cython .ccall
316415 def get_coffset_by_region (self , ref_n : cython .uint , beg : cython .uint , end : cython .uint ) -> cython .ulong :
317- """Return the BGZF compressed offset for the leftmost overlapping block."""
416+ """Return the BGZF compressed offset for the leftmost overlapping block.
417+
418+ Args:
419+ ref_n: Reference index in the BAI.
420+ beg: start coordinate.
421+ end: end coordinate.
422+
423+ Returns:
424+ int: Compressed BGZF block offset, or 0 if no chunks overlap.
425+
426+ Examples:
427+ .. code-block:: python
428+
429+ from MACS3.IO.BAM import BAIFile
430+ bai = BAIFile("example.bam.bai")
431+ bai.open()
432+ bai.read()
433+ coffset = bai.get_coffset_by_region(ref_n=0, beg=1_000_000, end=1_010_000)
434+ """
318435 voffset_tmp : cython .ulong
319436 coffset_tmp : cython .ulong
320437 chunks : list
@@ -334,7 +451,25 @@ def get_coffset_by_region(self, ref_n: cython.uint, beg: cython.uint, end: cytho
334451
335452 @cython .ccall
336453 def get_coffsets_by_list_of_regions (self , ref_n : cython .uint , regions : list ) -> cython .ulong :
337- """Return compressed offsets for the leftmost block of each region."""
454+ """Return compressed offsets for the leftmost block of each region.
455+
456+ Args:
457+ ref_n: Reference index in the BAI.
458+ regions: list of regions.
459+
460+ Returns:
461+ list: Compressed offsets for each region, in input order.
462+
463+ Examples:
464+ .. code-block:: python
465+
466+ from MACS3.IO.BAM import BAIFile
467+ bai = BAIFile("example.bam.bai")
468+ bai.open()
469+ bai.read()
470+ regions = [(1_000, 2_000), (50_000, 55_000)]
471+ coffsets = bai.get_coffsets_by_list_of_regions(ref_n=0, regions=regions)
472+ """
338473 beg : cython .uint
339474 end : cython .uint
340475 i : cython .int
@@ -357,6 +492,17 @@ class BAMaccessor:
357492 The accessor reads headers via gzip for compatibility, but seeks
358493 directly to BGZF blocks when fetching alignments for specific
359494 regions.
495+
496+ Attributes:
497+ bam_filename: Path to the BAM file.
498+ bai_filename: Path to ``.bai`` file.
499+ bamfile: BAM file handler "rb" mode.
500+ baifile: BAI file handler.
501+ references: Reference/chromosome names in BAM order.
502+ rlengths: Lengths of reference/chromosomes.
503+ bgzf_block_cache: Cache of decompressed bgzf_block.
504+ coffset_cache: coffset of the cached bgzf_block.
505+ noffset_cache: coffset of the next block of the cached bgzf_block.
360506 """
361507 # all private
362508 bam_filename : str # BAM filename
@@ -392,7 +538,11 @@ def __init__(self, BAM_filename: str):
392538
393539 @cython .ccall
394540 def close (self ):
395- """Close the underlying BAM stream."""
541+ """Close the underlying BAM stream.
542+
543+ Returns:
544+ None
545+ """
396546 self .bamfile .close ()
397547
398548 @cython .cfunc
@@ -457,12 +607,20 @@ def __check_sorted(self, header: bytes) -> bool:
457607
458608 @cython .ccall
459609 def get_chromosomes (self ) -> list :
460- """Return reference names in header order."""
610+ """Return reference names in header order.
611+
612+ Returns:
613+ list: Reference/chromosome.
614+ """
461615 return self .references
462616
463617 @cython .ccall
464618 def get_rlengths (self ) -> dict :
465- """Return reference lengths keyed by reference name."""
619+ """Return reference lengths keyed by reference name.
620+
621+ Returns:
622+ dict: Mapping of reference name to length.
623+ """
466624 return self .rlengths
467625
468626 @cython .ccall
@@ -485,13 +643,24 @@ def __decode_voffset(self, voffset: cython.ulong) -> tuple:
485643
486644 @cython .ccall
487645 def __seek (self , offset : cython .ulong ) -> bool :
488- """Seek to a compressed BGZF block offset within the BAM file."""
646+ """Seek to a compressed BGZF block offset within the BAM file.
647+
648+ Args:
649+ offset: Compressed BGZF block offset.
650+
651+ Returns:
652+ bool: ``True`` after seeking.
653+ """
489654 self .bamfile .seek (offset , 0 )
490655 return True
491656
492657 @cython .ccall
493658 def __retrieve_cdata_from_bgzf_block (self ) -> bool :
494- """Decompress the next BGZF block and cache the uncompressed payload."""
659+ """Decompress the next BGZF block and cache the uncompressed payload.
660+
661+ Returns:
662+ bool: ``True`` after updating the cache.
663+ """
495664 xlen : cython .ushort
496665 bsize : cython .ushort
497666 extra : bytes
0 commit comments