Skip to content

Commit 249d5f4

Browse files
authored
Merge pull request #544 from maxibor/dev
Add bam filtering on fragment length script
2 parents baa0d60 + a40ae71 commit 249d5f4

2 files changed

Lines changed: 83 additions & 0 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
3232
* [#512](https://github.com/nf-core/eager/issues/512) Added flexible trimming of bams by library type. 'half' and 'none' UDG libraries can now be trimmed differentially within a single eager run.
3333
* Added a `.dockstore.yml` config file for automatic workflow registration with [dockstore.org](https://dockstore.org/)
3434
* Updated template to nf-core/tools 1.10.2
35+
* [#544](https://github.com/nf-core/eager/pull/544) Add script to perform bam filtering on fragment length
3536

3637
### `Fixed`
3738

bin/filter_bam_fragment_length.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import pysam
5+
6+
7+
def get_args():
8+
"""This function parses and return arguments passed in"""
9+
parser = argparse.ArgumentParser(
10+
prog="bam_filter", description="Filter bam on fragment length"
11+
)
12+
parser.add_argument("bam", help="Bam aligment file")
13+
parser.add_argument(
14+
"-l",
15+
dest="fraglen",
16+
default=35,
17+
type=int,
18+
help="Minimum fragment length. Default = 35",
19+
)
20+
parser.add_argument(
21+
"-a",
22+
dest="all",
23+
default=False,
24+
action="store_true",
25+
help="Include all reads, even unmapped",
26+
)
27+
parser.add_argument(
28+
"-o",
29+
dest="output",
30+
default=None,
31+
help="Output bam basename. Default = {bam_basename}.filtered.bam",
32+
)
33+
34+
args = parser.parse_args()
35+
36+
bam = args.bam
37+
fraglen = args.fraglen
38+
allreads = args.all
39+
outfile = args.output
40+
41+
return (bam, fraglen, allreads, outfile)
42+
43+
44+
def getBasename(file_name):
45+
if ("/") in file_name:
46+
basename = file_name.split("/")[-1].split(".")[0]
47+
else:
48+
basename = file_name.split(".")[0]
49+
return basename
50+
51+
52+
def filter_bam(infile, outfile, fraglen, allreads):
53+
"""Write bam to file
54+
55+
Args:
56+
infile (stream): pysam stream
57+
outfile (str): Path to output bam
58+
fraglen(int): Minimum fragment length to keep
59+
allreads(bool): Apply on all reads, not only mapped
60+
"""
61+
bamfile = pysam.AlignmentFile(infile, "rb")
62+
bamwrite = pysam.AlignmentFile(outfile + ".filtered.bam", "wb", template=bamfile)
63+
64+
for read in bamfile.fetch(until_eof=True):
65+
if allreads:
66+
if read.query_length >= fraglen:
67+
bamwrite.write(read)
68+
else:
69+
if read.is_unmapped == False and read.query_length >= fraglen:
70+
bamwrite.write(read)
71+
72+
73+
if __name__ == "__main__":
74+
BAM, FRAGLEN, ALLREADS, OUTFILE = get_args()
75+
76+
BAMFILE = pysam.AlignmentFile(BAM, "rb")
77+
78+
if OUTFILE is None:
79+
OUTFILE = getBasename(BAM)
80+
81+
filter_bam(BAM, OUTFILE, FRAGLEN, ALLREADS)
82+

0 commit comments

Comments
 (0)