Skip to content

Commit 5b8a81d

Browse files
committed
increase max count in frag file to 65535, give warning if count is over the limit while parsing the file
1 parent 8a833cf commit 5b8a81d

8 files changed

Lines changed: 44 additions & 25 deletions

File tree

MACS3/IO/Parser.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# cython: language_level=3
22
# cython: profile=True
33
# cython: linetrace=True
4-
# Time-stamp: <2025-02-15 08:24:53 Tao Liu>
4+
# Time-stamp: <2025-04-11 13:41:46 Tao Liu>
55

66
"""Module for all MACS Parser classes for input. Please note that the
77
parsers are for reading the alignment files ONLY.
@@ -34,7 +34,7 @@
3434
logger = logging.getLogger(__name__)
3535
debug = logger.debug
3636
info = logger.info
37-
37+
warn = logger.warn
3838
# ------------------------------------
3939
# Other modules
4040
# ------------------------------------
@@ -1513,17 +1513,23 @@ def pe_parse_line(self, thisline: bytes):
15131513
15141514
"""
15151515
thisfields: list
1516+
thiscount: cython.ushort
15161517

15171518
thisline = thisline.rstrip()
15181519

15191520
# still only support tabular as delimiter.
15201521
thisfields = thisline.split(b'\t')
15211522
try:
1523+
try:
1524+
thiscount = atoi(thisfields[4])
1525+
except OverflowError:
1526+
thiscount = 65535
1527+
warn(f"The count in this line is over 65535, and will be capped at 65535: {thisline}")
15221528
return (thisfields[0],
15231529
atoi(thisfields[1]),
15241530
atoi(thisfields[2]),
15251531
thisfields[3],
1526-
atoi(thisfields[4]))
1532+
thiscount)
15271533
except IndexError:
15281534
raise Exception("Less than 5 columns found at this line: %s\n" %
15291535
thisline)
@@ -1537,7 +1543,7 @@ def build_petrack(self, max_count=0):
15371543
left_pos: cython.int
15381544
right_pos: cython.int
15391545
barcode: bytes
1540-
count: cython.uchar
1546+
count: cython.ushort
15411547
i: cython.long = 0 # number of fragments
15421548
m: cython.long = 0 # sum of fragment lengths
15431549
tmp: bytes = b""
@@ -1591,7 +1597,7 @@ def append_petrack(self, petrack, max_count=0):
15911597
left_pos: cython.int
15921598
right_pos: cython.int
15931599
barcode: bytes
1594-
count: cython.uchar
1600+
count: cython.ushort
15951601
i: cython.long = 0 # number of fragments
15961602
m: cython.long = 0 # sum of fragment lengths
15971603
tmp: bytes = b""

MACS3/Signal/PairedEndTrack.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# cython: language_level=3
22
# cython: profile=True
3-
# Time-stamp: <2025-02-15 08:49:30 Tao Liu>
3+
# Time-stamp: <2025-04-11 13:24:36 Tao Liu>
44

55
"""Module for filter duplicate tags from paired-end data
66
@@ -779,7 +779,7 @@ class PETrackII:
779779

780780
def __init__(self, anno: str = "", buffer_size: cython.long = 100000):
781781
# dictionary with chrname as key, nparray with
782-
# [('l','i4'),('r','i4'),('c','u1')] as value
782+
# [('l','i4'),('r','i4'),('c','u2')] as value
783783
self.locations = {}
784784
# dictionary with chrname as key, size of the above nparray as value
785785
# size is to remember the size of the fragments added to this chromosome
@@ -805,7 +805,7 @@ def add_loc(self,
805805
start: cython.int,
806806
end: cython.int,
807807
barcode: bytes,
808-
count: cython.uchar):
808+
count: cython.ushort):
809809
"""Add a location to the list according to the sequence name.
810810
811811
chromosome: mostly the chromosome name
@@ -828,7 +828,7 @@ def add_loc(self,
828828
# note: ['l'] is the leftmost end, ['r'] is the rightmost end of fragment.
829829
# ['c'] is the count number of this fragment
830830
self.locations[chromosome] = np.zeros(shape=self.buffer_size,
831-
dtype=[('l', 'i4'), ('r', 'i4'), ('c', 'u1')])
831+
dtype=[('l', 'i4'), ('r', 'i4'), ('c', 'u2')])
832832
self.barcodes[chromosome] = np.zeros(shape=self.buffer_size,
833833
dtype='i4')
834834
self.locations[chromosome][0] = (start, end, count)

MACS3/Signal/PileupV2.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# cython: language_level=3
22
# cython: profile=True
3-
# Time-stamp: <2025-02-14 11:56:44 Tao Liu>
3+
# Time-stamp: <2025-04-11 13:26:28 Tao Liu>
44

55
"""Module Description:
66
@@ -112,7 +112,7 @@ def make_PV_from_LRC(LRC_array: cnp.ndarray,
112112
`mapping_func( L, R )` or simply 1 if mapping_func is the default.
113113
114114
LRC array is an np.ndarray as with dtype
115-
[('l','i4'),('r','i4'),('c','u1')] with length of N
115+
[('l','i4'),('r','i4'),('c','u2')] with length of N
116116
117117
PV array is an np.ndarray with
118118
dtype=[('p','i4'),('v','f4')] with length of 2N
@@ -122,7 +122,7 @@ def make_PV_from_LRC(LRC_array: cnp.ndarray,
122122
i: cython.ulong
123123
L: cython.int
124124
R: cython.int
125-
C: cython.uchar
125+
C: cython.ushort
126126
PV: cnp.ndarray
127127

128128
l_LRC = LRC_array.shape[0]
@@ -292,7 +292,7 @@ def pileup_from_LRC(LRC_array: cnp.ndarray,
292292
293293
User needs to provide a numpy array of left and right positions
294294
and the counts, with
295-
dtype=[('l','i4'),('r','i4'),('c','u1')]. User also needs to
295+
dtype=[('l','i4'),('r','i4'),('c','u2')]. User also needs to
296296
provide a mapping function to map the left and right position to
297297
certain weight.
298298

docs/source/docs/bdgdiff.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,16 @@ condition B while comparing treatment B over control B, and also the
3030
likelihood test while comparing treatment A and treatment B can't
3131
decide which condition is stronger.
3232

33+
34+
3335
The likelihood function we used while comparing two conditions: ChIP
3436
(enrichment) or control (chromatin bias) is:
3537

3638
$$ln(LR) = x*(ln(x)-ln(y)) + y - x$$
3739

3840
Here $LR$ is the likelihood ratio, x is the signal (fragment pileup)
3941
we observed in condition 1, and y is the signal in condition
40-
2. And $ln$ is the natural logarithm.
42+
2. And $ln$ is the natural logarithm.
4143

4244
Note: All regions on the same chromosome in the bedGraph file should
4345
be continuous so only bedGraph files from MACS3 are acceptable.

docs/source/docs/callpeak.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,24 @@ invoked by `macs3 callpeak` . If you type this command with `-h`, you
88
will see a full description of command-line options. Here we only list
99
the essentials.
1010

11+
## Examples of Commandline Usage
12+
1. Peak calling for regular TF ChIP-seq:
13+
$ macs3 callpeak -t ChIP.bam -c Control.bam -f BAM -g hs -n test -B -q 0.01
14+
2. Broad peak calling on Histone Mark ChIP-seq:
15+
$ macs3 callpeak -t ChIP.bam -c Control.bam --broad -g hs --broad-cutoff 0.1
16+
3. Peak calling on ATAC-seq (paired-end mode):
17+
$ macs3 callpeak -f BAMPE -t ATAC.bam -g hs -n test -B -q 0.01
18+
4. Peak calling on ATAC-seq (focusing on insertion sites, and using single-end mode):
19+
$ macs3 callpeak -f BAM -t ATAC.bam -g hs -n test -B -q 0.01 --shift -50 --extension 100
20+
5. Peak calling on scATAC-seq (paired-end mode):
21+
$ macs3 callpeak -f BEDPE -t scATAC.fragments.tsv.gz -g hs -n test -B -q 0.01 -n test
22+
6. Peak calling on scATAC-seq (paired-end mode):
23+
$ macs3 callpeak -f FRAG -t scATAC.fragments.tsv.gz -g hs -n test -B -q 0.01 -n test
24+
7. Peak calling on scATAC-seq (paired-end mode) and only for given barcodes:
25+
$ macs3 callpeak -f FRAG -t scATAC.fragments.tsv.gz -g hs -n test -B -q 0.01 -n test --barcodes barcodes.txt
26+
27+
28+
1129
## Essential Commandline Options
1230

1331
### Input and Output

test/test_PairedEndTrack.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/usr/bin/env python
2-
# Time-stamp: <2025-02-12 13:44:51 Tao Liu>
2+
# Time-stamp: <2025-04-11 13:42:39 Tao Liu>
33

44
import unittest
55
from MACS3.Signal.PairedEndTrack import PETrackI, PETrackII
@@ -96,7 +96,6 @@ def setUp(self):
9696
(b"chrY", 50, 160, b"0w#AAACGAACAAGTAAGA", 2), # will be excluded
9797
(b"chrY", 100, 170, b"0w#AAACGAACAAGTAAGA", 3)
9898
]
99-
10099
self.exclude_regions = [(b"chrY", 10, 75)]
101100
self.x_regions = Regions()
102101
for r in self.exclude_regions:

test/test_Parser.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/usr/bin/env python
2-
# Time-stamp: <2025-02-15 08:23:55 Tao Liu>
2+
# Time-stamp: <2025-04-11 13:46:05 Tao Liu>
33

44
import unittest
55

@@ -40,15 +40,9 @@ def setUp(self):
4040
self.bedpefile = "test/tiny.bedpe.gz"
4141
self.samfile = "test/tiny.sam.gz"
4242
self.bamfile = "test/tiny.bam"
43-
self.fragfile = "test/test.fragments.tsv.gz"
44-
43+
self.fragfile = "test/tiny.frag.tsv.gz"
44+
4545
def test_fragment_file(self):
4646
p = FragParser(self.fragfile)
4747
petrack = p.build_petrack()
4848
petrack.finalize()
49-
bdg = petrack.pileup_bdg()
50-
bdg2 = petrack.pileup_bdg2()
51-
peaks = bdg.call_peaks(cutoff=10, min_length=200, max_gap=100)
52-
peaks2 = bdg2.call_peaks(cutoff=10, min_length=200, max_gap=100)
53-
# print(peaks)
54-
# print(peaks2)

test/tiny.frag.tsv.gz

104 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)