Skip to content

Commit 66196c5

Browse files
authored
Merge pull request #322 from maxibor/strip_fastq
Move extract map reads fastq compression to pigz
2 parents d8ccb82 + f332e4d commit 66196c5

3 files changed

Lines changed: 28 additions & 53 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
3131
* Fixed Travis-Ci.org to Travis-Ci.com migration issues
3232
* [#266](https://github.com/nf-core/eager/issues/266) - Added sanity checks for input filetypes (i.e. only BAM files can be supplied if `--bam`)
3333
* [#237](https://github.com/nf-core/eager/issues/237) - Fixed and Updated script scrape_software_versions
34+
* [#322](https://github.com/nf-core/eager/pull/322) - Move extract map reads fastq compression to pigz
3435

3536
### `Dependencies`
3637

bin/extract_map_reads.py

Lines changed: 21 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -179,54 +179,25 @@ def write_fq(fq_dict, fname, mode):
179179
- fname(string) Path to output fastq file
180180
- mode(string) strip (remove read) or replace (replace read sequence) by Ns
181181
"""
182-
183-
if fname.endswith('.gz'):
184-
with gzip.open(fname, 'wb') as f:
185-
for k in list(fq_dict.keys()):
186-
if mode == 'strip':
187-
# if unmapped, write all the read lines
188-
if fq_dict[k][0] == 'u':
189-
f.write(f"@{k}\n".encode())
190-
for i in fq_dict[k][1:]:
191-
f.write(f"{i}\n".encode())
192-
# if mapped, do not write the read lines
193-
elif fq_dict[k][0] == 'm':
194-
continue
195-
196-
elif mode == 'replace':
197-
# if unmapped, write all the read lines
198-
if fq_dict[k][0] == 'u':
199-
f.write(f"@{k}\n".encode())
200-
for i in fq_dict[k][1:]:
201-
f.write(f"{i}\n".encode())
202-
# if mapped, write all the read lines, but replace sequence
203-
# by N*(len(sequence))
204-
elif fq_dict[k][0] == 'm':
205-
f.write(f"@{k}\n".encode())
206-
f.write(f"{'N'*len(fq_dict[k][1])}\n".encode())
207-
for i in fq_dict[k][2:]:
208-
f.write(f"{i}\n".encode())
209-
210-
else:
211-
with open(fname, 'w') as f:
212-
for k in list(fq_dict.keys()):
213-
if mode == 'strip':
214-
if fq_dict[k][0] == 'u':
215-
f.write(f"@{k}\n")
216-
for i in fq_dict[k][1:]:
217-
f.write(f"{i}\n")
218-
elif fq_dict[k][0] == 'm':
219-
continue
220-
elif mode == 'replace':
221-
if fq_dict[k][0] == 'u':
222-
f.write(f"@{k}\n")
223-
for i in fq_dict[k][1:]:
224-
f.write(f"{i}\n")
225-
elif fq_dict[k][0] == 'm':
226-
f.write(f"@{k}\n")
227-
f.write(f"{'N'*len(fq_dict[k][1])}\n")
228-
for i in fq_dict[k][2:]:
229-
f.write(f"{i}\n")
182+
with open(fname, 'w') as f:
183+
for k in list(fq_dict.keys()):
184+
if mode == 'strip':
185+
if fq_dict[k][0] == 'u':
186+
f.write(f"@{k}\n")
187+
for i in fq_dict[k][1:]:
188+
f.write(f"{i}\n")
189+
elif fq_dict[k][0] == 'm':
190+
continue
191+
elif mode == 'replace':
192+
if fq_dict[k][0] == 'u':
193+
f.write(f"@{k}\n")
194+
for i in fq_dict[k][1:]:
195+
f.write(f"{i}\n")
196+
elif fq_dict[k][0] == 'm':
197+
f.write(f"@{k}\n")
198+
f.write(f"{'N'*len(fq_dict[k][1])}\n")
199+
for i in fq_dict[k][2:]:
200+
f.write(f"{i}\n")
230201

231202

232203
def check_strip_mode(mode):
@@ -238,7 +209,7 @@ def check_strip_mode(mode):
238209
BAM, IN_FWD, IN_REV, OUT_FWD, OUT_REV, MODE, PROC = _get_args()
239210

240211
if OUT_FWD == None:
241-
out_fwd = f"{IN_FWD.split('/')[-1].split('.')[0]}.r1.fq.gz"
212+
out_fwd = f"{IN_FWD.split('/')[-1].split('.')[0]}.r1.fq"
242213
else:
243214
out_fwd = OUT_FWD
244215

@@ -248,7 +219,7 @@ def check_strip_mode(mode):
248219
write_fq(fwd_reads, out_fwd, MODE)
249220
if IN_REV:
250221
if OUT_REV == None:
251-
out_rev = f"{IN_REV.split('/')[-1].split('.')[0]}.r2.fq.gz"
222+
out_rev = f"{IN_REV.split('/')[-1].split('.')[0]}.r2.fq"
252223
else:
253224
out_rev = OUT_REV
254225
rev_dict = parse_fq(IN_REV)

main.nf

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,17 +1313,20 @@ process strip_input_fastq {
13131313

13141314
script:
13151315
if (params.singleEnd) {
1316-
out_fwd = bam.baseName+'.stripped.fq.gz'
1316+
out_fwd = bam.baseName+'.stripped.fq'
13171317
"""
13181318
samtools index $bam
13191319
extract_map_reads.py $bam ${fq[0]} -m ${params.strip_mode} -of $out_fwd -p ${task.cpus}
1320+
pigz -p ${task.cpus} $out_fwd
13201321
"""
13211322
} else {
1322-
out_fwd = bam.baseName+'.stripped.fwd.fq.gz'
1323-
out_rev = bam.baseName+'.stripped.rev.fq.gz'
1323+
out_fwd = bam.baseName+'.stripped.fwd.fq'
1324+
out_rev = bam.baseName+'.stripped.rev.fq'
13241325
"""
13251326
samtools index $bam
13261327
extract_map_reads.py $bam ${fq[0]} -rev ${fq[1]} -m ${params.strip_mode} -of $out_fwd -or $out_rev -p ${task.cpus}
1328+
pigz -p ${task.cpus} $out_fwd
1329+
pigz -p ${task.cpus} $out_rev
13271330
"""
13281331
}
13291332

0 commit comments

Comments
 (0)