Skip to content

Commit 752bede

Browse files
authored
Merge pull request #32 from nf-core/dev
Dev
2 parents 0fab6be + bd400b3 commit 752bede

4 files changed

Lines changed: 76 additions & 17 deletions

File tree

bin/scrape_software_versions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
'BWA': ['v_bwa.txt', r"Version: (\S+)"],
1515
'Qualimap': ['v_qualimap.txt', r"QualiMap v.(\S+)"],
1616
'GATK': ['v_gatk.txt', r"Version:([\d\.]+)"],
17+
'bamUtil' : ['v_bamutil.txt', r"Version: ([\d\.]+)"],
18+
'fastP': ['v_fastp.txt', r"([\d\.]+)"],
1719
}
1820
results = OrderedDict()
1921
results['nf-core/eager'] = '<span style="color:#999999;\">N/A</span>'
@@ -26,6 +28,8 @@
2628
results['BWA'] = '<span style="color:#999999;\">N/A</span>'
2729
results['Qualimap'] = '<span style="color:#999999;\">N/A</span>'
2830
results['GATK'] = '<span style="color:#999999;\">N/A</span>'
31+
results['bamUtil'] = '<span style="color:#999999;\">N/A</span>'
32+
results['fastP'] = '<span style="color:#999999;\">N/A</span>'
2933

3034
# Search each file using its regex
3135
for k, v in regexes.items():

conf/base.config

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,18 @@ process {
2222
maxErrors = '-1'
2323

2424
// Process-specific resource requirements (others leave at default, e.g. Fastqc)
25+
withName:get_software_versions {
26+
memory = { check_max( 2.GB, 'memory' ) }
27+
cache = false
28+
//errorStrategy = 'ignore'
29+
}
30+
withName:workflow_summary_mqc {
31+
memory = { check_max( 2.GB, 'memory' ) }
32+
cache = false
33+
executor = 'local'
34+
errorStrategy = 'ignore'
35+
}
36+
2537
withName:bwa {
2638
memory = { check_max( 16.GB * task.attempt, 'memory' ) }
2739
cpus = { check_max(8 * task.attempt, 'cpus') }

environment.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ dependencies:
99
- adapterremoval=2.2.2
1010
- adapterremovalfixprefix=0.0.4
1111
- bwa=0.7.17
12-
- picard=2.18.11
12+
- picard=2.18.14
1313
- samtools=1.9
1414
- dedup=0.12.3
1515
- angsd=0.921
@@ -23,4 +23,6 @@ dependencies:
2323
- r-markdown=0.8
2424
- sequencetools=1.2.2
2525
- preseq=2.0.3
26-
#Missing Schmutzi
26+
- fastp=0.19.4
27+
- bamutil=1.0.14
28+
#Missing Schmutzi,snpAD

main.nf

Lines changed: 56 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,10 @@ params.skip_damage_calculation = false
7575
params.skip_qualimap = false
7676
params.skip_deduplication = false
7777

78+
//Complexity filtering reads
79+
params.complexity_filter = false
80+
params.complexity_filter_poly_g_min = 10
81+
7882
//Read clipping and merging parameters
7983
params.clip_forward_adaptor = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC"
8084
params.clip_reverse_adaptor = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA"
@@ -135,23 +139,23 @@ if(params.readPaths){
135139
.map { row -> [ row[0], [file(row[1][0])]] }
136140
.ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" }
137141
.dump(tag:'input')
138-
.into { read_files_fastqc; read_files_trimming }
142+
.into { ch_read_files_fastqc; ch_read_files_trimming; ch_read_files_complexity_filtering }
139143

140144
} else {
141145
Channel
142146
.from(params.readPaths)
143147
.map { row -> [ row[0], [file(row[1][0]), file(row[1][1])]] }
144148
.ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" }
145149
.dump(tag:'input')
146-
.into { ch_read_files_clip; ch_read_files_fastqc }
150+
.into { ch_read_files_clip; ch_read_files_fastqc; ch_read_files_complexity_filtering }
147151

148152
}
149153
} else {
150154
Channel
151155
.fromFilePairs( params.reads, size: params.singleEnd ? 1 : 2 )
152156
.ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}\nNB: Path needs to be enclosed in quotes!\nNB: Path requires at least one * wildcard!\nIf this is single-end data, please specify --singleEnd on the command line." }
153157
.dump(tag:'input')
154-
.into { ch_read_files_clip; ch_read_files_fastqc }
158+
.into { ch_read_files_clip; ch_read_files_fastqc; ch_read_files_complexity_filtering }
155159

156160
}
157161

@@ -222,17 +226,20 @@ process get_software_versions {
222226
"""
223227
echo $workflow.manifest.version &> v_pipeline.txt
224228
echo $workflow.nextflow.version &> v_nextflow.txt
225-
fastqc --version &> v_fastqc.txt
226-
multiqc --version &> v_multiqc.txt
227-
echo \$(bwa 2>&1) &> v_bwa.txt
228-
samtools --version &> v_samtools.txt
229-
echo \$(AdapterRemoval -version 2>&1) &> v_adapterremoval.txt
230-
picard MarkDuplicates --version &> v_markduplicates.txt || true
231-
echo \$(dedup -v 2>&1) &> v_dedup.txt
232-
preseq &> v_preseq.txt
233-
gatk BaseRecalibrator --version &> v_gatk.txt
234-
qualimap --version &> v_qualimap.txt
235-
vcf2genome &> v_vcf2genome.txt
229+
fastqc --version &> v_fastqc.txt 2>&1 || true
230+
multiqc --version &> v_multiqc.txt 2>&1 || true
231+
bwa &> v_bwa.txt 2>&1 || true
232+
samtools --version &> v_samtools.txt 2>&1 || true
233+
AdapterRemoval -version &> v_adapterremoval.txt 2>&1 || true
234+
picard MarkDuplicates --version &> v_markduplicates.txt 2>&1 || true
235+
dedup -v &> v_dedup.txt 2>&1 || true
236+
preseq &> v_preseq.txt 2>&1 || true
237+
gatk BaseRecalibrator --version 2>&1 | grep Version: > v_gatk.txt 2>&1 || true
238+
vcf2genome &> v_vcf2genome.txt 2>&1 || true
239+
fastp --version &> v_fastp.txt 2>&1 || true
240+
bam --version &> v_bamutil.txt 2>&1 || true
241+
qualimap --version &> v_qualimap.txt 2>&1 || true
242+
236243
scrape_software_versions.py &> software_versions_mqc.yaml
237244
"""
238245
}
@@ -322,6 +329,38 @@ process fastqc {
322329
"""
323330
}
324331

332+
333+
/* STEP 2.0 - FastP
334+
* Optional poly-G complexity filtering step before read merging/adapter clipping etc
335+
* Note: Clipping, Merging, Quality Trimning are turned off here - we leave this to adapter removal itself!
336+
*/
337+
338+
process fastp {
339+
tag "$name"
340+
publishDir "${params.outdir}/01-FastP", mode: 'copy'
341+
342+
when: params.complexity_filter
343+
344+
input:
345+
set val(name), file(reads) from ch_read_files_complexity_filtering
346+
347+
output:
348+
set val(name), file("*pG.fq.gz") into (ch_clipped_reads_complexity_filtered, ch_debug_complexity_filtering)
349+
file("*.json") into ch_fastp_for_multiqc
350+
351+
script:
352+
if(${params.singleEnd}){
353+
"""
354+
fastp -in1 ${reads[0]} -out1 "${reads[0].baseName}.pG.fq.gz" -A -g --poly_g_min_lin "${params.complexity_filter_poly_g_min}" -Q -L -w ${task.cpus} -json "${reads[0].baseName}"_fastp.json
355+
"""
356+
} else {
357+
"""
358+
fastp -in1 ${reads[0]} -in2 ${reads[1]} -out1 "${reads[0].baseName}.pG.fq.gz" -out2 "${reads[1].baseName}.pG.fq.gz" -A -g --poly_g_min_lin "${params.complexity_filter_poly_g_min}" -Q -L -w ${task.cpus} -json "$read.baseName}"_fastp.json
359+
"""
360+
}
361+
}
362+
363+
325364
/*
326365
* STEP 2 - Adapter Clipping / Read Merging
327366
*/
@@ -332,7 +371,7 @@ process adapter_removal {
332371
publishDir "${params.outdir}/02-Merging", mode: 'copy'
333372

334373
input:
335-
set val(name), file(reads) from ch_read_files_clip
374+
set val(name), file(reads) from ( params.complexity_filter ? ch_clipped_reads_complexity_filtered : ch_read_files_clip )
336375

337376
output:
338377
file "*.combined*.gz" into ch_clipped_reads
@@ -594,6 +633,8 @@ process multiqc {
594633
file ('qualimap/*') from ch_qualimap_results.collect().ifEmpty([])
595634
file ('markdup/*') from ch_markdup_results_for_multiqc.collect().ifEmpty([])
596635
file ('dedup/*') from ch_dedup_results_for_multiqc.collect().ifEmpty([])
636+
file ('fastp/*') from ch_fastp_for_multiqc.collect().ifEmpty([])
637+
597638
file workflow_summary from create_workflow_summary(summary)
598639

599640
output:

0 commit comments

Comments
 (0)