@@ -75,6 +75,10 @@ params.skip_damage_calculation = false
7575params. skip_qualimap = false
7676params. skip_deduplication = false
7777
78+ // Complexity filtering reads
79+ params. complexity_filter = false
80+ params. complexity_filter_poly_g_min = 10
81+
7882// Read clipping and merging parameters
7983params. clip_forward_adaptor = " AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC"
8084params. clip_reverse_adaptor = " AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA"
@@ -135,23 +139,23 @@ if(params.readPaths){
135139 .map { row -> [ row[0 ], [file(row[1 ][0 ])]] }
136140 .ifEmpty { exit 1 , " params.readPaths was empty - no input files supplied" }
137141 .dump(tag :' input' )
138- .into { read_files_fastqc; read_files_trimming }
142+ .into { ch_read_files_fastqc; ch_read_files_trimming; ch_read_files_complexity_filtering }
139143
140144 } else {
141145 Channel
142146 .from(params. readPaths)
143147 .map { row -> [ row[0 ], [file(row[1 ][0 ]), file(row[1 ][1 ])]] }
144148 .ifEmpty { exit 1 , " params.readPaths was empty - no input files supplied" }
145149 .dump(tag :' input' )
146- .into { ch_read_files_clip; ch_read_files_fastqc }
150+ .into { ch_read_files_clip; ch_read_files_fastqc; ch_read_files_complexity_filtering }
147151
148152 }
149153} else {
150154 Channel
151155 .fromFilePairs( params. reads, size : params. singleEnd ? 1 : 2 )
152156 .ifEmpty { exit 1 , " Cannot find any reads matching: ${ params.reads} \n NB: Path needs to be enclosed in quotes!\n NB: Path requires at least one * wildcard!\n If this is single-end data, please specify --singleEnd on the command line." }
153157 .dump(tag :' input' )
154- .into { ch_read_files_clip; ch_read_files_fastqc }
158+ .into { ch_read_files_clip; ch_read_files_fastqc; ch_read_files_complexity_filtering }
155159
156160}
157161
@@ -222,17 +226,20 @@ process get_software_versions {
222226 """
223227 echo $workflow . manifest . version &> v_pipeline.txt
224228 echo $workflow . nextflow . version &> v_nextflow.txt
225- fastqc --version &> v_fastqc.txt
226- multiqc --version &> v_multiqc.txt
227- echo \$ (bwa 2>&1) &> v_bwa.txt
228- samtools --version &> v_samtools.txt
229- echo \$ (AdapterRemoval -version 2>&1) &> v_adapterremoval.txt
230- picard MarkDuplicates --version &> v_markduplicates.txt || true
231- echo \$ (dedup -v 2>&1) &> v_dedup.txt
232- preseq &> v_preseq.txt
233- gatk BaseRecalibrator --version &> v_gatk.txt
234- qualimap --version &> v_qualimap.txt
235- vcf2genome &> v_vcf2genome.txt
229+ fastqc --version &> v_fastqc.txt 2>&1 || true
230+ multiqc --version &> v_multiqc.txt 2>&1 || true
231+ bwa &> v_bwa.txt 2>&1 || true
232+ samtools --version &> v_samtools.txt 2>&1 || true
233+ AdapterRemoval -version &> v_adapterremoval.txt 2>&1 || true
234+ picard MarkDuplicates --version &> v_markduplicates.txt 2>&1 || true
235+ dedup -v &> v_dedup.txt 2>&1 || true
236+ preseq &> v_preseq.txt 2>&1 || true
237+ gatk BaseRecalibrator --version 2>&1 | grep Version: > v_gatk.txt 2>&1 || true
238+ vcf2genome &> v_vcf2genome.txt 2>&1 || true
239+ fastp --version &> v_fastp.txt 2>&1 || true
240+ bam --version &> v_bamutil.txt 2>&1 || true
241+ qualimap --version &> v_qualimap.txt 2>&1 || true
242+
236243 scrape_software_versions.py &> software_versions_mqc.yaml
237244 """
238245}
@@ -322,6 +329,38 @@ process fastqc {
322329 """
323330}
324331
332+
333+ /* STEP 2.0 - FastP
334+ * Optional poly-G complexity filtering step before read merging/adapter clipping etc
335+ * Note: Clipping, Merging, Quality Trimning are turned off here - we leave this to adapter removal itself!
336+ */
337+
338+ process fastp {
339+ tag " $name "
340+ publishDir " ${ params.outdir} /01-FastP" , mode: ' copy'
341+
342+ when: params. complexity_filter
343+
344+ input:
345+ set val(name), file(reads) from ch_read_files_complexity_filtering
346+
347+ output:
348+ set val(name), file(" *pG.fq.gz" ) into (ch_clipped_reads_complexity_filtered, ch_debug_complexity_filtering)
349+ file(" *.json" ) into ch_fastp_for_multiqc
350+
351+ script:
352+ if (${params. singleEnd}){
353+ """
354+ fastp -in1 ${ reads[0]} -out1 "${ reads[0].baseName} .pG.fq.gz" -A -g --poly_g_min_lin "${ params.complexity_filter_poly_g_min} " -Q -L -w ${ task.cpus} -json "${ reads[0].baseName} "_fastp.json
355+ """
356+ } else {
357+ """
358+ fastp -in1 ${ reads[0]} -in2 ${ reads[1]} -out1 "${ reads[0].baseName} .pG.fq.gz" -out2 "${ reads[1].baseName} .pG.fq.gz" -A -g --poly_g_min_lin "${ params.complexity_filter_poly_g_min} " -Q -L -w ${ task.cpus} -json "$read . baseName }"_fastp.json
359+ """
360+ }
361+ }
362+
363+
325364/*
326365 * STEP 2 - Adapter Clipping / Read Merging
327366 */
@@ -332,7 +371,7 @@ process adapter_removal {
332371 publishDir " ${ params.outdir} /02-Merging" , mode: ' copy'
333372
334373 input:
335- set val(name), file(reads) from ch_read_files_clip
374+ set val(name), file(reads) from ( params . complexity_filter ? ch_clipped_reads_complexity_filtered : ch_read_files_clip )
336375
337376 output:
338377 file " *.combined*.gz" into ch_clipped_reads
@@ -594,6 +633,8 @@ process multiqc {
594633 file (' qualimap/*' ) from ch_qualimap_results. collect(). ifEmpty([])
595634 file (' markdup/*' ) from ch_markdup_results_for_multiqc. collect(). ifEmpty([])
596635 file (' dedup/*' ) from ch_dedup_results_for_multiqc. collect(). ifEmpty([])
636+ file (' fastp/*' ) from ch_fastp_for_multiqc. collect(). ifEmpty([])
637+
597638 file workflow_summary from create_workflow_summary(summary)
598639
599640 output:
0 commit comments