Skip to content

Commit 4874301

Browse files
committed
move extract map reads fastq compression to pigz
1 parent 5832685 commit 4874301

2 files changed

Lines changed: 27 additions & 53 deletions

File tree

bin/extract_map_reads.py

Lines changed: 21 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -179,54 +179,25 @@ def write_fq(fq_dict, fname, mode):
179179
- fname(string) Path to output fastq file
180180
- mode(string) strip (remove read) or replace (replace read sequence) by Ns
181181
"""
182-
183-
if fname.endswith('.gz'):
184-
with gzip.open(fname, 'wb') as f:
185-
for k in list(fq_dict.keys()):
186-
if mode == 'strip':
187-
# if unmapped, write all the read lines
188-
if fq_dict[k][0] == 'u':
189-
f.write(f"@{k}\n".encode())
190-
for i in fq_dict[k][1:]:
191-
f.write(f"{i}\n".encode())
192-
# if mapped, do not write the read lines
193-
elif fq_dict[k][0] == 'm':
194-
continue
195-
196-
elif mode == 'replace':
197-
# if unmapped, write all the read lines
198-
if fq_dict[k][0] == 'u':
199-
f.write(f"@{k}\n".encode())
200-
for i in fq_dict[k][1:]:
201-
f.write(f"{i}\n".encode())
202-
# if mapped, write all the read lines, but replace sequence
203-
# by N*(len(sequence))
204-
elif fq_dict[k][0] == 'm':
205-
f.write(f"@{k}\n".encode())
206-
f.write(f"{'N'*len(fq_dict[k][1])}\n".encode())
207-
for i in fq_dict[k][2:]:
208-
f.write(f"{i}\n".encode())
209-
210-
else:
211-
with open(fname, 'w') as f:
212-
for k in list(fq_dict.keys()):
213-
if mode == 'strip':
214-
if fq_dict[k][0] == 'u':
215-
f.write(f"@{k}\n")
216-
for i in fq_dict[k][1:]:
217-
f.write(f"{i}\n")
218-
elif fq_dict[k][0] == 'm':
219-
continue
220-
elif mode == 'replace':
221-
if fq_dict[k][0] == 'u':
222-
f.write(f"@{k}\n")
223-
for i in fq_dict[k][1:]:
224-
f.write(f"{i}\n")
225-
elif fq_dict[k][0] == 'm':
226-
f.write(f"@{k}\n")
227-
f.write(f"{'N'*len(fq_dict[k][1])}\n")
228-
for i in fq_dict[k][2:]:
229-
f.write(f"{i}\n")
182+
with open(fname, 'w') as f:
183+
for k in list(fq_dict.keys()):
184+
if mode == 'strip':
185+
if fq_dict[k][0] == 'u':
186+
f.write(f"@{k}\n")
187+
for i in fq_dict[k][1:]:
188+
f.write(f"{i}\n")
189+
elif fq_dict[k][0] == 'm':
190+
continue
191+
elif mode == 'replace':
192+
if fq_dict[k][0] == 'u':
193+
f.write(f"@{k}\n")
194+
for i in fq_dict[k][1:]:
195+
f.write(f"{i}\n")
196+
elif fq_dict[k][0] == 'm':
197+
f.write(f"@{k}\n")
198+
f.write(f"{'N'*len(fq_dict[k][1])}\n")
199+
for i in fq_dict[k][2:]:
200+
f.write(f"{i}\n")
230201

231202

232203
def check_strip_mode(mode):
@@ -238,7 +209,7 @@ def check_strip_mode(mode):
238209
BAM, IN_FWD, IN_REV, OUT_FWD, OUT_REV, MODE, PROC = _get_args()
239210

240211
if OUT_FWD == None:
241-
out_fwd = f"{IN_FWD.split('/')[-1].split('.')[0]}.r1.fq.gz"
212+
out_fwd = f"{IN_FWD.split('/')[-1].split('.')[0]}.r1.fq"
242213
else:
243214
out_fwd = OUT_FWD
244215

@@ -248,7 +219,7 @@ def check_strip_mode(mode):
248219
write_fq(fwd_reads, out_fwd, MODE)
249220
if IN_REV:
250221
if OUT_REV == None:
251-
out_rev = f"{IN_REV.split('/')[-1].split('.')[0]}.r2.fq.gz"
222+
out_rev = f"{IN_REV.split('/')[-1].split('.')[0]}.r2.fq"
252223
else:
253224
out_rev = OUT_REV
254225
rev_dict = parse_fq(IN_REV)

main.nf

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1292,17 +1292,20 @@ process strip_input_fastq {
12921292

12931293
script:
12941294
if (params.singleEnd) {
1295-
out_fwd = bam.baseName+'.stripped.fq.gz'
1295+
out_fwd = bam.baseName+'.stripped.fq'
12961296
"""
12971297
samtools index $bam
12981298
extract_map_reads.py $bam ${fq[0]} -m ${params.strip_mode} -of $out_fwd -p ${task.cpus}
1299+
pigz -p ${task.cpus} $out_fwd
12991300
"""
13001301
} else {
1301-
out_fwd = bam.baseName+'.stripped.fwd.fq.gz'
1302-
out_rev = bam.baseName+'.stripped.rev.fq.gz'
1302+
out_fwd = bam.baseName+'.stripped.fwd.fq'
1303+
out_rev = bam.baseName+'.stripped.rev.fq'
13031304
"""
13041305
samtools index $bam
13051306
extract_map_reads.py $bam ${fq[0]} -rev ${fq[1]} -m ${params.strip_mode} -of $out_fwd -or $out_rev -p ${task.cpus}
1307+
pigz -p ${task.cpus} $out_fwd
1308+
pigz -p ${task.cpus} $out_rev
13061309
"""
13071310
}
13081311

0 commit comments

Comments
 (0)