Skip to content

Commit 85b0406

Browse files
authored
Merge pull request #159 from nf-core/ar_optional
Flexible AdapterRemoval
2 parents 5a255fd + 5a637c9 commit 85b0406

5 files changed

Lines changed: 148 additions & 36 deletions

File tree

.travis.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,12 @@ script:
4040
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --saveReference
4141
# Run the basic pipeline with single end data (pretending its single end actually)
4242
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --singleEnd --bwa_index results/reference_genome/bwa_index/bwa_index/
43+
# Run the basic pipeline with paired end data without collapsing
44+
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --skip_collapse --saveReference
45+
# Run the basic pipeline with paired end data without trimming
46+
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --skip_trim --saveReference
47+
# Run the basic pipeline with paired end data without adapterRemoval
48+
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --skip_adapterremoval --saveReference
4349
# Run the same pipeline testing optional step: fastp, complexity
4450
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --complexity_filter --bwa_index results/reference_genome/bwa_index/bwa_index/
4551
# Test BAM Trimming

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
1010

1111
* [#152](https://github.com/nf-core/eager/pull/152) - Clarified `--complexity_filter` flag to be specifically for poly G trimming.
1212
* [#155](https://github.com/nf-core/eager/pull/155) - Added [Dedup log to output folders](https://github.com/nf-core/eager/issues/154)
13+
* [#159](https://github.com/nf-core/eager/pull/159) - Added Possibility to skip AdapterRemoval, skip merging, skip trimming fixing [#64](https://github.com/nf-core/eager/issues/64),[#137](https://github.com/nf-core/eager/issues/137) - thanks to @maxibor, @jfy133
1314

1415
### `Fixed`
1516

docs/usage.md

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,12 +283,16 @@ This part of the documentation contains a list of user-adjustable parameters in
283283

284284
## Step skipping parameters
285285

286-
Some of the steps in the pipeline can be executed optionally. If you specify specific steps to be skipped, there won't be any output related to these modules.
286+
Some of the steps in the pipeline can be executed optionally. If you specify specific steps to be skipped, there won't be any output related to these modules.
287287

288288
### `--skip_preseq`
289289

290290
Turns off the computation of library complexity estimation.
291291

292+
### `--skip_adapterremoval`
293+
294+
Turns off adaptor trimming and paired-end read merging. Equivalent to setting both `--skip_collapse` and `--skip_trim`.
295+
292296
### `--skip_damage_calculation`
293297

294298
Turns off the DamageProfiler module to compute DNA damage profiles.
@@ -333,6 +337,24 @@ Defines the minimum read quality per base that is required for a base to be kept
333337
### `--clip_min_adap_overlap` 1
334338
Sets the minimum overlap between two reads when read merging is performed. Default is set to `1` base overlap.
335339

340+
### `--skip_collapse`
341+
342+
Turns off the paired-end read merging.
343+
344+
For example
345+
```bash
346+
--pairedEnd --skip_collapse --reads '*.fastq'
347+
```
348+
349+
### `--skip_trim`
350+
351+
Turns off adaptor and quality trimming.
352+
353+
For example:
354+
```bash
355+
--pairedEnd --skip_trim --reads '*.fastq'
356+
```
357+
336358
## Read Mapping Parameters
337359

338360
## BWA (default)

main.nf

Lines changed: 111 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def helpMessage() {
4444
--saveReference Saves reference genome indices for later reusage
4545
4646
Skipping Skip any of the mentioned steps
47+
--skip_adapterremoval
4748
--skip_preseq
4849
--skip_damage_calculation
4950
--skip_qualimap
@@ -59,6 +60,8 @@ def helpMessage() {
5960
--clip_readlength Specify read minimum length to be kept for downstream analysis
6061
--clip_min_read_quality Specify minimum base quality for not trimming off bases
6162
--min_adap_overlap Specify minimum adapter overlap
63+
--skip_collapse Skip merging forward and reverse reads together. (Only for PE samples)
64+
--skip_trim Skip adaptor and quality trimming
6265
6366
BWA Mapping
6467
--bwaalnn Specify the -n parameter for BWA aln.
@@ -145,6 +148,7 @@ params.email = false
145148
params.plaintext_email = false
146149

147150
// Skipping parts of the pipeline for impatient users
151+
params.skip_adapterremoval = false
148152
params.skip_preseq = false
149153
params.skip_damage_calculation = false
150154
params.skip_qualimap = false
@@ -160,6 +164,8 @@ params.clip_reverse_adaptor = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA"
160164
params.clip_readlength = 30
161165
params.clip_min_read_quality = 20
162166
params.min_adap_overlap = 1
167+
params.skip_collapse = false
168+
params.skip_trim = false
163169

164170
//Read mapping parameters (default = BWA aln default)
165171
params.bwaalnn = 0.04
@@ -258,6 +264,10 @@ if( params.singleEnd || params.pairedEnd || params.bam){
258264
exit 1, "Please specify either --singleEnd, --pairedEnd to execute the pipeline on FastQ files and --bam for previously processed BAM files!"
259265
}
260266

267+
//Validate that skip_collapse is only set to True for pairedEnd reads!
268+
if (params.skip_collapse && params.singleEnd){
269+
exit 1, "--skip_collapse can only be set for pairedEnd samples!"
270+
}
261271

262272
//AWSBatch sanity checking
263273
if(workflow.profile == 'awsbatch'){
@@ -343,6 +353,8 @@ summary['Fasta Ref'] = params.fasta
343353
summary['BAM Index Type'] = (params.large_ref == "") ? 'BAI' : 'CSI'
344354
if(params.bwa_index) summary['BWA Index'] = params.bwa_index
345355
summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End'
356+
summary['Skip Collapsing'] = params.skip_collapse ? 'Yes' : 'No'
357+
summary['Skip Trimming'] = params.skip_trim ? 'Yes' : 'No'
346358
summary['Max Memory'] = params.max_memory
347359
summary['Max CPUs'] = params.max_cpus
348360
summary['Max Time'] = params.max_time
@@ -362,7 +374,7 @@ if(workflow.profile == 'awsbatch'){
362374
summary['AWS Queue'] = params.awsqueue
363375
}
364376
if(params.email) summary['E-mail Address'] = params.email
365-
log.info summary.collect { k,v -> "${k.padRight(15)}: $v" }.join("\n")
377+
log.info summary.collect { k,v -> "${k.padRight(35)}: $v" }.join("\n")
366378
log.info "========================================="
367379

368380

@@ -501,8 +513,7 @@ process convertBam {
501513
file bam from ch_bam_to_fastq_convert
502514

503515
output:
504-
set val("${base}"), file("*.fastq.gz") into (ch_read_files_converted_fastqc, ch_read_files_converted_fastp)
505-
file("*.fastq.gz") into (ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem)
516+
set val("${base}"), file("*.fastq.gz") into (ch_read_files_converted_fastqc, ch_read_files_converted_fastp, ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem)
506517

507518
script:
508519
base = "${bam.baseName}"
@@ -568,60 +579,87 @@ process fastp {
568579
/*
569580
* STEP 2 - Adapter Clipping / Read Merging
570581
*/
571-
572-
582+
//Initialize empty channel if we skip adapterremoval entirely
583+
if(params.skip_adapterremoval) {
584+
//No logs if no AR is run
585+
ch_adapterremoval_logs = Channel.empty()
586+
//Either coming from complexity filtering, or directly use reads normally directed to clipping first and push them through to the other channels downstream!
587+
ch_clipped_reads_complexity_filtered_poly_g.mix(ch_read_files_clip).into { ch_clipped_reads;ch_clipped_reads_for_fastqc;ch_clipped_reads_circularmapper;ch_clipped_reads_bwamem }
588+
} else {
573589
process adapter_removal {
574590
tag "$name"
575591
publishDir "${params.outdir}/read_merging", mode: 'copy'
576592

577-
when: !params.bam
593+
when: !params.bam && !params.skip_adapterremoval
578594

579595
input:
580596
set val(name), file(reads) from ( params.complexity_filter_poly_g ? ch_clipped_reads_complexity_filtered_poly_g : ch_read_files_clip )
581597

582598
output:
583-
file "*.combined*.gz" into (ch_clipped_reads, ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem)
584-
file "*.settings" into ch_adapterremoval_logs
599+
set val(base), file("output/*.gz") into (ch_clipped_reads,ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem)
600+
file("*.settings") into ch_adapterremoval_logs
585601

586602
script:
587-
prefix = reads[0].toString() - ~/(_R1)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/
588-
//Readprefixing only required for PE data with merging
589-
fixprefix = (params.singleEnd) ? "" : "AdapterRemovalFixPrefix ${prefix}.combined.fq.gz ${prefix}.combined.prefixed.fq.gz"
603+
base = reads[0].baseName
604+
//This checks whether we skip trimming and defines a variable respectively
605+
trim_me = params.skip_trim ? '' : "--trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap}"
606+
collapse_me = params.skip_collapse ? '' : '--collapse'
590607

591-
if( !params.singleEnd ){
608+
//PE mode, dependent on trim_me and collapse_me the respective procedure is run or not :-)
609+
if (!params.singleEnd && !params.skip_collapse && !params.skip_trim){
592610
"""
593-
AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${prefix} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} --collapse
611+
mkdir -p output
612+
AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} ${trim_me} --gzip --threads ${task.cpus} ${collapse_me}
594613
#Combine files
595-
zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > ${prefix}.combined.fq.gz
596-
${fixprefix}
597-
rm ${prefix}.combined.fq.gz
614+
zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > output/${base}.combined.fq.gz
615+
"""
616+
//PE, don't collapse, but trim reads
617+
} else if (!params.singleEnd && params.skip_collapse && !params.skip_trim) {
618+
"""
619+
mkdir -p output
620+
AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} ${trim_me} ${collapse_me}
621+
mv ${base}.pair*.truncated.gz output/
622+
"""
623+
//PE, collapse, but don't trim reads
624+
} else if (!params.singleEnd && !params.skip_collapse && params.skip_trim) {
625+
"""
626+
mkdir -p output
627+
AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --basename ${base} ${collapse_me} ${trim_me}
628+
629+
mv ${base}.pair*.truncated.gz output/
598630
"""
599631
} else {
632+
//SE, collapse not possible, trim reads
600633
"""
601-
AdapterRemoval --file1 ${reads[0]} --basename ${prefix} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality}
602-
# Pseudo-Combine
603-
mv *.truncated.gz ${prefix}.combined.fq.gz
634+
mkdir -p output
635+
AdapterRemoval --file1 ${reads[0]} --basename ${base} --gzip --threads ${task.cpus} ${trim_me}
636+
637+
mv *.truncated.gz output/
604638
"""
605639
}
606640
}
641+
}
642+
643+
607644

608645
/*
609-
* STEP 2.1 - FastQC after clipping/merging (if applied!)
610-
*/
646+
* STEP 2.1 - FastQC after clipping/merging (if applied!)
647+
*/
611648
process fastqc_after_clipping {
612-
tag "${reads[0].baseName}"
649+
tag "${prefix}"
613650
publishDir "${params.outdir}/FastQC/after_clipping", mode: 'copy',
614651
saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"}
615652

616-
when: !params.bam
653+
when: !params.bam && !params.skip_adapterremoval
617654

618655
input:
619-
file(reads) from ch_clipped_reads_for_fastqc
656+
set val(name), file(reads) from ch_clipped_reads_for_fastqc
620657

621658
output:
622659
file "*_fastqc.{zip,html}" optional true into ch_fastqc_after_clipping
623660

624661
script:
662+
prefix = reads[0].toString().tokenize('.')[0]
625663
"""
626664
fastqc -q $reads
627665
"""
@@ -638,7 +676,8 @@ process bwa {
638676
when: !params.circularmapper && !params.bwamem
639677

640678
input:
641-
file(reads) from ch_clipped_reads.mix(ch_read_files_converted_mapping_bwa)
679+
set val(name), file(reads) from ch_clipped_reads.mix(ch_read_files_converted_mapping_bwa)
680+
642681
file index from ch_bwa_index.first()
643682

644683

@@ -648,14 +687,28 @@ process bwa {
648687

649688

650689
script:
651-
prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/
652-
fasta = "${index}/*.fasta"
690+
fasta = "${index}/*.fasta"
653691
size = "${params.large_ref}" ? '-c' : ''
692+
693+
//PE data without merging, PE data without any AR applied
694+
if (!params.singleEnd && (params.skip_collapse || params.skip_adapterremoval)){
695+
prefix = reads[0].toString().tokenize('.')[0]
654696
"""
655-
bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai"
656-
bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam
697+
bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai
698+
bwa aln -t ${task.cpus} $fasta ${reads[1]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r2.sai
699+
bwa sampe -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.r1.sai ${prefix}.r2.sai ${reads[0]} ${reads[1]} | samtools sort -@ ${task.cpus} -O bam - > ${prefix}.sorted.bam
657700
samtools index "${size}" "${prefix}".sorted.bam
658701
"""
702+
} else {
703+
//PE collapsed, or SE data
704+
prefix = reads[0].toString().tokenize('.')[0]
705+
"""
706+
bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.sai
707+
bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.sai $reads | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam
708+
samtools index "${size}" "${prefix}".sorted.bam
709+
"""
710+
}
711+
659712
}
660713

661714
process circulargenerator{
@@ -694,7 +747,7 @@ process circularmapper{
694747
when: params.circularmapper
695748

696749
input:
697-
file reads from ch_clipped_reads_circularmapper.mix(ch_read_files_converted_mapping_cm)
750+
set val(name), file(reads) from ch_clipped_reads_circularmapper.mix(ch_read_files_converted_mapping_cm)
698751
file index from ch_circularmapper_indices.first()
699752

700753
output:
@@ -703,17 +756,31 @@ process circularmapper{
703756

704757
script:
705758
filter = "${params.circularfilter}" ? '' : '-f true -x false'
706-
prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/
759+
707760
fasta = "${index}/*_*.fasta"
708761
size = "${params.large_ref}" ? '-c' : ''
709762

763+
if (!params.singleEnd && params.skip_collapse ){
764+
prefix = reads[0].toString().tokenize('.')[0]
710765
"""
711-
bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai"
712-
bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads > tmp.out
766+
bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai
767+
bwa aln -t ${task.cpus} $fasta ${reads[1]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r2.sai
768+
bwa sampe -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.r1.sai ${prefix}.r2.sai ${reads[0]} ${reads[1]} > tmp.out
769+
realignsamfile -e ${params.circularextension} -i tmp.out -r $fasta $filter
770+
samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > ${prefix}.sorted.bam
771+
samtools index "${size}" ${prefix}.sorted.bam
772+
"""
773+
} else {
774+
prefix = reads[0].toString().tokenize('.')[0]
775+
"""
776+
bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.sai
777+
bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.sai $reads > tmp.out
713778
realignsamfile -e ${params.circularextension} -i tmp.out -r $fasta $filter
714779
samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > "${prefix}".sorted.bam
715780
samtools index "${size}" "${prefix}".sorted.bam
716781
"""
782+
}
783+
717784
}
718785

719786
process bwamem {
@@ -723,7 +790,7 @@ process bwamem {
723790
when: params.bwamem && !params.circularmapper
724791

725792
input:
726-
file(reads) from ch_clipped_reads_bwamem.mix(ch_read_files_converted_mapping_bwamem)
793+
set val(name), file(reads) from ch_clipped_reads_bwamem.mix(ch_read_files_converted_mapping_bwamem)
727794
file index from ch_bwa_index_bwamem.first()
728795

729796
output:
@@ -735,10 +802,19 @@ process bwamem {
735802
prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/
736803
fasta = "${index}/*.fasta"
737804
size = "${params.large_ref}" ? '-c' : ''
805+
806+
if (!params.singleEnd && params.skip_collapse){
807+
"""
808+
bwa mem -t ${task.cpus} $fasta ${reads[0]} ${reads[1]} -R "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam
809+
samtools index "${size}" -@ ${task.cpus} "${prefix}".sorted.bam
810+
"""
811+
} else {
738812
"""
739813
bwa mem -t ${task.cpus} $fasta $reads -R "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam
740-
samtools index "${size}" -@ ${task.cpus} "${prefix}".sorted.bam
814+
samtools index "${size}" -@ ${task.cpus} "${prefix}".sorted.bam
741815
"""
816+
}
817+
742818
}
743819

744820
/*

nextflow.config

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ params {
1414

1515
//Pipeline options
1616
aligner = 'bwa'
17+
1718
saveReference = false
1819
saveTrimmed = true
1920
saveAlignedIntermediates = false
@@ -30,6 +31,11 @@ params {
3031
complexity_filter_poly_g_min = 10
3132
trim_bam = false
3233

34+
//Skipping adapterremoval, trimming or collapsing defaults
35+
skip_adapterremoval = false
36+
skip_trim = false
37+
skip_adapterremoval = false
38+
3339
// AWS Batch
3440
awsqueue = false
3541
awsregion = 'eu-west-1'
@@ -38,6 +44,7 @@ params {
3844
custom_config_version = 'master'
3945
}
4046

47+
4148
// Load base.config by default for all pipelines
4249
includeConfig 'conf/base.config'
4350

0 commit comments

Comments
 (0)