@@ -44,6 +44,7 @@ def helpMessage() {
4444 --saveReference Saves reference genome indices for later reusage
4545
4646 Skipping Skip any of the mentioned steps
47+ --skip_adapterremoval
4748 --skip_preseq
4849 --skip_damage_calculation
4950 --skip_qualimap
@@ -59,6 +60,8 @@ def helpMessage() {
5960 --clip_readlength Specify read minimum length to be kept for downstream analysis
6061 --clip_min_read_quality Specify minimum base quality for not trimming off bases
6162 --min_adap_overlap Specify minimum adapter overlap
63+ --skip_collapse Skip merging forward and reverse reads together. (Only for PE samples)
64+ --skip_trim Skip adaptor and quality trimming
6265
6366 BWA Mapping
6467 --bwaalnn Specify the -n parameter for BWA aln.
@@ -145,6 +148,7 @@ params.email = false
145148params. plaintext_email = false
146149
147150// Skipping parts of the pipeline for impatient users
151+ params. skip_adapterremoval = false
148152params. skip_preseq = false
149153params. skip_damage_calculation = false
150154params. skip_qualimap = false
@@ -160,6 +164,8 @@ params.clip_reverse_adaptor = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA"
160164params. clip_readlength = 30
161165params. clip_min_read_quality = 20
162166params. min_adap_overlap = 1
167+ params. skip_collapse = false
168+ params. skip_trim = false
163169
164170// Read mapping parameters (default = BWA aln default)
165171params. bwaalnn = 0.04
@@ -258,6 +264,10 @@ if( params.singleEnd || params.pairedEnd || params.bam){
258264 exit 1 , " Please specify either --singleEnd, --pairedEnd to execute the pipeline on FastQ files and --bam for previously processed BAM files!"
259265}
260266
267+ // Validate that skip_collapse is only set to True for pairedEnd reads!
268+ if (params. skip_collapse && params. singleEnd){
269+ exit 1 , " --skip_collapse can only be set for pairedEnd samples!"
270+ }
261271
262272// AWSBatch sanity checking
263273if (workflow. profile == ' awsbatch' ){
@@ -343,6 +353,8 @@ summary['Fasta Ref'] = params.fasta
343353summary[' BAM Index Type' ] = (params. large_ref == " " ) ? ' BAI' : ' CSI'
344354if (params. bwa_index) summary[' BWA Index' ] = params. bwa_index
345355summary[' Data Type' ] = params. singleEnd ? ' Single-End' : ' Paired-End'
356+ summary[' Skip Collapsing' ] = params. skip_collapse ? ' Yes' : ' No'
357+ summary[' Skip Trimming' ] = params. skip_trim ? ' Yes' : ' No'
346358summary[' Max Memory' ] = params. max_memory
347359summary[' Max CPUs' ] = params. max_cpus
348360summary[' Max Time' ] = params. max_time
@@ -362,7 +374,7 @@ if(workflow.profile == 'awsbatch'){
362374 summary[' AWS Queue' ] = params. awsqueue
363375}
364376if (params. email) summary[' E-mail Address' ] = params. email
365- log. info summary. collect { k,v -> " ${ k.padRight(15 )} : $v " }. join(" \n " )
377+ log. info summary. collect { k,v -> " ${ k.padRight(35 )} : $v " }. join(" \n " )
366378log. info " ========================================="
367379
368380
@@ -501,8 +513,7 @@ process convertBam {
501513 file bam from ch_bam_to_fastq_convert
502514
503515 output:
504- set val(" ${ base} " ), file(" *.fastq.gz" ) into (ch_read_files_converted_fastqc, ch_read_files_converted_fastp)
505- file(" *.fastq.gz" ) into (ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem)
516+ set val(" ${ base} " ), file(" *.fastq.gz" ) into (ch_read_files_converted_fastqc, ch_read_files_converted_fastp, ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem)
506517
507518 script:
508519 base = " ${ bam.baseName} "
@@ -568,60 +579,87 @@ process fastp {
568579/*
569580 * STEP 2 - Adapter Clipping / Read Merging
570581 */
571-
572-
582+ // Initialize empty channel if we skip adapterremoval entirely
583+ if (params. skip_adapterremoval) {
584+ // No logs if no AR is run
585+ ch_adapterremoval_logs = Channel . empty()
586+ // Either coming from complexity filtering, or directly use reads normally directed to clipping first and push them through to the other channels downstream!
587+ ch_clipped_reads_complexity_filtered_poly_g. mix(ch_read_files_clip). into { ch_clipped_reads;ch_clipped_reads_for_fastqc;ch_clipped_reads_circularmapper;ch_clipped_reads_bwamem }
588+ } else {
573589process adapter_removal {
574590 tag " $name "
575591 publishDir " ${ params.outdir} /read_merging" , mode: ' copy'
576592
577- when: ! params. bam
593+ when: ! params. bam && ! params . skip_adapterremoval
578594
579595 input:
580596 set val(name), file(reads) from ( params. complexity_filter_poly_g ? ch_clipped_reads_complexity_filtered_poly_g : ch_read_files_clip )
581597
582598 output:
583- file " *.combined*. gz" into (ch_clipped_reads, ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem)
584- file " *.settings" into ch_adapterremoval_logs
599+ set val(base), file( " output/*. gz" ) into (ch_clipped_reads,ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem)
600+ file( " *.settings" ) into ch_adapterremoval_logs
585601
586602 script:
587- prefix = reads[0 ]. toString() - ~/ (_R1)? (_trimmed)? (_val_1)? (\. fq)? (\. fastq)? (\. gz)? $/
588- // Readprefixing only required for PE data with merging
589- fixprefix = (params. singleEnd) ? " " : " AdapterRemovalFixPrefix ${ prefix} .combined.fq.gz ${ prefix} .combined.prefixed.fq.gz"
603+ base = reads[0 ]. baseName
604+ // This checks whether we skip trimming and defines a variable respectively
605+ trim_me = params. skip_trim ? ' ' : " --trimns --trimqualities --adapter1 ${ params.clip_forward_adaptor} --adapter2 ${ params.clip_reverse_adaptor} --minlength ${ params.clip_readlength} --minquality ${ params.clip_min_read_quality} --minadapteroverlap ${ params.min_adap_overlap} "
606+ collapse_me = params. skip_collapse ? ' ' : ' --collapse'
590607
591- if ( ! params. singleEnd ){
608+ // PE mode, dependent on trim_me and collapse_me the respective procedure is run or not :-)
609+ if (! params. singleEnd && ! params. skip_collapse && ! params. skip_trim){
592610 """
593- AdapterRemoval --file1 ${ reads[0]} --file2 ${ reads[1]} --basename ${ prefix} --gzip --threads ${ task.cpus} --trimns --trimqualities --adapter1 ${ params.clip_forward_adaptor} --adapter2 ${ params.clip_reverse_adaptor} --minlength ${ params.clip_readlength} --minquality ${ params.clip_min_read_quality} --minadapteroverlap ${ params.min_adap_overlap} --collapse
611+ mkdir -p output
612+ AdapterRemoval --file1 ${ reads[0]} --file2 ${ reads[1]} --basename ${ base} ${ trim_me} --gzip --threads ${ task.cpus} ${ collapse_me}
594613 #Combine files
595- zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > ${ prefix} .combined.fq.gz
596- ${ fixprefix}
597- rm ${ prefix} .combined.fq.gz
614+ zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > output/${ base} .combined.fq.gz
615+ """
616+ // PE, don't collapse, but trim reads
617+ } else if (! params. singleEnd && params. skip_collapse && ! params. skip_trim) {
618+ """
619+ mkdir -p output
620+ AdapterRemoval --file1 ${ reads[0]} --file2 ${ reads[1]} --basename ${ base} --gzip --threads ${ task.cpus} ${ trim_me} ${ collapse_me}
621+ mv ${ base} .pair*.truncated.gz output/
622+ """
623+ // PE, collapse, but don't trim reads
624+ } else if (! params. singleEnd && ! params. skip_collapse && params. skip_trim) {
625+ """
626+ mkdir -p output
627+ AdapterRemoval --file1 ${ reads[0]} --file2 ${ reads[1]} --basename ${ base} --gzip --threads ${ task.cpus} --basename ${ base} ${ collapse_me} ${ trim_me}
628+
629+ mv ${ base} .pair*.truncated.gz output/
598630 """
599631 } else {
632+ // SE, collapse not possible, trim reads
600633 """
601- AdapterRemoval --file1 ${ reads[0]} --basename ${ prefix} --gzip --threads ${ task.cpus} --trimns --trimqualities --adapter1 ${ params.clip_forward_adaptor} --minlength ${ params.clip_readlength} --minquality ${ params.clip_min_read_quality}
602- # Pseudo-Combine
603- mv *.truncated.gz ${ prefix} .combined.fq.gz
634+ mkdir -p output
635+ AdapterRemoval --file1 ${ reads[0]} --basename ${ base} --gzip --threads ${ task.cpus} ${ trim_me}
636+
637+ mv *.truncated.gz output/
604638 """
605639 }
606640}
641+ }
642+
643+
607644
608645/*
609- * STEP 2.1 - FastQC after clipping/merging (if applied!)
610- */
646+ * STEP 2.1 - FastQC after clipping/merging (if applied!)
647+ */
611648process fastqc_after_clipping {
612- tag " ${ reads[0].baseName } "
649+ tag " ${ prefix } "
613650 publishDir " ${ params.outdir} /FastQC/after_clipping" , mode: ' copy' ,
614651 saveAs: {filename -> filename. indexOf(" .zip" ) > 0 ? " zips/$filename " : " $filename " }
615652
616- when: ! params. bam
653+ when: ! params. bam && ! params . skip_adapterremoval
617654
618655 input:
619- file(reads) from ch_clipped_reads_for_fastqc
656+ set val(name), file(reads) from ch_clipped_reads_for_fastqc
620657
621658 output:
622659 file " *_fastqc.{zip,html}" optional true into ch_fastqc_after_clipping
623660
624661 script:
662+ prefix = reads[0 ]. toString(). tokenize(' .' )[0 ]
625663 """
626664 fastqc -q $reads
627665 """
@@ -638,7 +676,8 @@ process bwa {
638676 when: ! params. circularmapper && ! params. bwamem
639677
640678 input:
641- file(reads) from ch_clipped_reads. mix(ch_read_files_converted_mapping_bwa)
679+ set val(name), file(reads) from ch_clipped_reads. mix(ch_read_files_converted_mapping_bwa)
680+
642681 file index from ch_bwa_index. first()
643682
644683
@@ -648,14 +687,28 @@ process bwa {
648687
649688
650689 script:
651- prefix = reads[0 ]. toString() - ~/ (_R1)? (\. combined\. )? (prefixed)? (_trimmed)? (_val_1)? (\. fq)? (\. fastq)? (\. gz)? $/
652- fasta = " ${ index} /*.fasta"
690+ fasta = " ${ index} /*.fasta"
653691 size = " ${ params.large_ref} " ? ' -c' : ' '
692+
693+ // PE data without merging, PE data without any AR applied
694+ if (! params. singleEnd && (params. skip_collapse || params. skip_adapterremoval)){
695+ prefix = reads[0 ]. toString(). tokenize(' .' )[0 ]
654696 """
655- bwa aln -t ${ task.cpus} $fasta $reads -n ${ params.bwaalnn} -l ${ params.bwaalnl} -k ${ params.bwaalnk} -f "${ reads.baseName} .sai"
656- bwa samse -r "@RG\\ tID:ILLUMINA-${ prefix} \\ tSM:${ prefix} \\ tPL:illumina" $fasta "${ reads.baseName} ".sai $reads | samtools sort -@ ${ task.cpus} -O bam - > "${ prefix} ".sorted.bam
697+ bwa aln -t ${ task.cpus} $fasta ${ reads[0]} -n ${ params.bwaalnn} -l ${ params.bwaalnl} -k ${ params.bwaalnk} -f ${ prefix} .r1.sai
698+ bwa aln -t ${ task.cpus} $fasta ${ reads[1]} -n ${ params.bwaalnn} -l ${ params.bwaalnl} -k ${ params.bwaalnk} -f ${ prefix} .r2.sai
699+ bwa sampe -r "@RG\\ tID:ILLUMINA-${ prefix} \\ tSM:${ prefix} \\ tPL:illumina" $fasta ${ prefix} .r1.sai ${ prefix} .r2.sai ${ reads[0]} ${ reads[1]} | samtools sort -@ ${ task.cpus} -O bam - > ${ prefix} .sorted.bam
657700 samtools index "${ size} " "${ prefix} ".sorted.bam
658701 """
702+ } else {
703+ // PE collapsed, or SE data
704+ prefix = reads[0 ]. toString(). tokenize(' .' )[0 ]
705+ """
706+ bwa aln -t ${ task.cpus} $fasta $reads -n ${ params.bwaalnn} -l ${ params.bwaalnl} -k ${ params.bwaalnk} -f ${ prefix} .sai
707+ bwa samse -r "@RG\\ tID:ILLUMINA-${ prefix} \\ tSM:${ prefix} \\ tPL:illumina" $fasta ${ prefix} .sai $reads | samtools sort -@ ${ task.cpus} -O bam - > "${ prefix} ".sorted.bam
708+ samtools index "${ size} " "${ prefix} ".sorted.bam
709+ """
710+ }
711+
659712}
660713
661714process circulargenerator{
@@ -694,7 +747,7 @@ process circularmapper{
694747 when: params. circularmapper
695748
696749 input:
697- file reads from ch_clipped_reads_circularmapper. mix(ch_read_files_converted_mapping_cm)
750+ set val(name), file( reads) from ch_clipped_reads_circularmapper. mix(ch_read_files_converted_mapping_cm)
698751 file index from ch_circularmapper_indices. first()
699752
700753 output:
@@ -703,17 +756,31 @@ process circularmapper{
703756
704757 script:
705758 filter = " ${ params.circularfilter} " ? ' ' : ' -f true -x false'
706- prefix = reads[ 0 ] . toString() - ~ / (_R1) ? (\ . combined\ . ) ? (prefixed) ? (_trimmed) ? (_val_1) ? (\ . fq) ? (\ . fastq) ? (\ . gz) ? $ /
759+
707760 fasta = " ${ index} /*_*.fasta"
708761 size = " ${ params.large_ref} " ? ' -c' : ' '
709762
763+ if (! params. singleEnd && params. skip_collapse ){
764+ prefix = reads[0 ]. toString(). tokenize(' .' )[0 ]
710765 """
711- bwa aln -t ${ task.cpus} $fasta $reads -n ${ params.bwaalnn} -l ${ params.bwaalnl} -k ${ params.bwaalnk} -f "${ reads.baseName} .sai"
712- bwa samse -r "@RG\\ tID:ILLUMINA-${ prefix} \\ tSM:${ prefix} \\ tPL:illumina" $fasta "${ reads.baseName} ".sai $reads > tmp.out
766+ bwa aln -t ${ task.cpus} $fasta ${ reads[0]} -n ${ params.bwaalnn} -l ${ params.bwaalnl} -k ${ params.bwaalnk} -f ${ prefix} .r1.sai
767+ bwa aln -t ${ task.cpus} $fasta ${ reads[1]} -n ${ params.bwaalnn} -l ${ params.bwaalnl} -k ${ params.bwaalnk} -f ${ prefix} .r2.sai
768+ bwa sampe -r "@RG\\ tID:ILLUMINA-${ prefix} \\ tSM:${ prefix} \\ tPL:illumina" $fasta ${ prefix} .r1.sai ${ prefix} .r2.sai ${ reads[0]} ${ reads[1]} > tmp.out
769+ realignsamfile -e ${ params.circularextension} -i tmp.out -r $fasta $filter
770+ samtools sort -@ ${ task.cpus} -O bam tmp_realigned.bam > ${ prefix} .sorted.bam
771+ samtools index "${ size} " ${ prefix} .sorted.bam
772+ """
773+ } else {
774+ prefix = reads[0 ]. toString(). tokenize(' .' )[0 ]
775+ """
776+ bwa aln -t ${ task.cpus} $fasta $reads -n ${ params.bwaalnn} -l ${ params.bwaalnl} -k ${ params.bwaalnk} -f ${ prefix} .sai
777+ bwa samse -r "@RG\\ tID:ILLUMINA-${ prefix} \\ tSM:${ prefix} \\ tPL:illumina" $fasta ${ prefix} .sai $reads > tmp.out
713778 realignsamfile -e ${ params.circularextension} -i tmp.out -r $fasta $filter
714779 samtools sort -@ ${ task.cpus} -O bam tmp_realigned.bam > "${ prefix} ".sorted.bam
715780 samtools index "${ size} " "${ prefix} ".sorted.bam
716781 """
782+ }
783+
717784}
718785
719786process bwamem {
@@ -723,7 +790,7 @@ process bwamem {
723790 when: params. bwamem && ! params. circularmapper
724791
725792 input:
726- file(reads) from ch_clipped_reads_bwamem. mix(ch_read_files_converted_mapping_bwamem)
793+ set val(name), file(reads) from ch_clipped_reads_bwamem. mix(ch_read_files_converted_mapping_bwamem)
727794 file index from ch_bwa_index_bwamem. first()
728795
729796 output:
@@ -735,10 +802,19 @@ process bwamem {
735802 prefix = reads[0 ]. toString() - ~/ (_R1)? (\. combined\. )? (prefixed)? (_trimmed)? (_val_1)? (\. fq)? (\. fastq)? (\. gz)? $/
736803 fasta = " ${ index} /*.fasta"
737804 size = " ${ params.large_ref} " ? ' -c' : ' '
805+
806+ if (! params. singleEnd && params. skip_collapse){
807+ """
808+ bwa mem -t ${ task.cpus} $fasta ${ reads[0]} ${ reads[1]} -R "@RG\\ tID:ILLUMINA-${ prefix} \\ tSM:${ prefix} \\ tPL:illumina" | samtools sort -@ ${ task.cpus} -O bam - > "${ prefix} ".sorted.bam
809+ samtools index "${ size} " -@ ${ task.cpus} "${ prefix} ".sorted.bam
810+ """
811+ } else {
738812 """
739813 bwa mem -t ${ task.cpus} $fasta $reads -R "@RG\\ tID:ILLUMINA-${ prefix} \\ tSM:${ prefix} \\ tPL:illumina" | samtools sort -@ ${ task.cpus} -O bam - > "${ prefix} ".sorted.bam
740- samtools index "${ size} " -@ ${ task.cpus} "${ prefix} ".sorted.bam
814+ samtools index "${ size} " -@ ${ task.cpus} "${ prefix} ".sorted.bam
741815 """
816+ }
817+
742818}
743819
744820/*
0 commit comments