diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 213c2ac69..e7b4cfe26 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -102,6 +102,12 @@ jobs: - name: ADAPTERREMOVAL Run the basic pipeline with preserve5p end and merged reads only options run: | nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --preserve5p --mergedonly + - name: ADAPTER LIST Run the basic pipeline using an adapter list + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --clip_adapters_list 'https://github.com/nf-core/test-datasets/raw/eager/databases/adapters/adapter-list.txt' + - name: ADAPTER LIST Run the basic pipeline using an adapter list, skipping adapter removal + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --clip_adapters_list 'https://github.com/nf-core/test-datasets/raw/eager/databases/adapters/adapter-list.txt' --skip_adapterremoval - name: MAPPER_CIRCULARMAPPER Test running with CircularMapper run: | nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --mapper 'circularmapper' --circulartarget 'NC_007596.2' diff --git a/CHANGELOG.md b/CHANGELOG.md index 32523330c..24a8d0b79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Added` +- [#651](https://github.com/nf-core/eager/issues/651) - Adds removal of adapters specified in an AdapterRemoval adapter list file + ### `Fixed` - [#771](https://github.com/nf-core/eager/issues/771) Remove legacy code diff --git a/main.nf b/main.nf index c7807d7a0..8430c8f71 100644 --- a/main.nf +++ b/main.nf @@ -227,6 +227,20 @@ if( params.bt2_index && params.mapper == 'bowtie2' ){ bwa_index_bwamem = Channel.empty() } +// Adapter removal adapter-list setup +if ( !params.clip_adapters_list ) { + Channel + .fromPath("$projectDir/assets/nf-core_eager_dummy2.txt", checkIfExists: true) + .ifEmpty { exit 1, "[nf-core/eager] error: adapters list file not found. Please check input. Supplied: --clip_adapters_list '${params.clip_adapters_list}'." } + .into {ch_adapterlist} +} else { + Channel + .fromPath("${params.clip_adapters_list}", checkIfExists: true) + .ifEmpty { exit 1, "[nf-core/eager] error: adapters list file not found. Please check input. Supplied: --clip_adapters_list '${params.clip_adapters_list}'." } + .into {ch_adapterlist} +} + + // SexDetermination channel set up and bedfile validation if (!params.sexdeterrmine_bedfile) { ch_bed_for_sexdeterrmine = Channel.fromPath("$projectDir/assets/nf-core_eager_dummy.txt") @@ -765,17 +779,19 @@ process adapter_removal { input: tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(r1), file(r2) from ch_fastp_for_adapterremoval + path adapterlist from ch_adapterlist.collect().dump(tag: "Adapter list") output: tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("output/*{combined.fq,.se.truncated,pair1.truncated}.gz") into ch_output_from_adapterremoval_r1 tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("output/*pair2.truncated.gz") optional true into ch_output_from_adapterremoval_r2 tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("output/*.settings") into ch_adapterremoval_logs - + when: !params.skip_adapterremoval script: - base = "${r1.baseName}_L${lane}" + def base = "${r1.baseName}_L${lane}" + def adapters_to_remove = !params.clip_adapters_list ? "--adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor}" : "--adapter-list ${adapterlist}" //This checks whether we skip trimming and defines a variable respectively def preserve5p = params.preserve5p ? '--preserve5p' : '' // applies to any AR command - doesn't affect output file combination @@ -783,7 +799,7 @@ process adapter_removal { """ mkdir -p output - AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities ${adapters_to_remove} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} cat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz > output/${base}.pe.combined.tmp.fq.gz @@ -797,7 +813,7 @@ process adapter_removal { """ mkdir -p output - AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities ${adapters_to_remove} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} cat *.collapsed.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz > output/${base}.pe.combined.tmp.fq.gz @@ -810,7 +826,7 @@ process adapter_removal { } else if ( seqtype == 'PE' && !params.skip_collapse && !params.skip_trim && params.mergedonly && !params.preserve5p ) { """ mkdir -p output - AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities ${adapters_to_remove} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} cat *.collapsed.gz *.collapsed.truncated.gz > output/${base}.pe.combined.tmp.fq.gz @@ -823,7 +839,7 @@ process adapter_removal { } else if ( seqtype == 'PE' && !params.skip_collapse && !params.skip_trim && params.mergedonly && params.preserve5p ) { """ mkdir -p output - AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities ${adapters_to_remove} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} cat *.collapsed.gz > output/${base}.pe.combined.tmp.fq.gz @@ -864,7 +880,7 @@ process adapter_removal { } else if ( seqtype == 'PE' && params.skip_collapse && !params.skip_trim ) { """ mkdir -p output - AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} ${preserve5p} --trimns --trimqualities ${adapters_to_remove} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} mv ${base}.pe.pair*.truncated.gz *.settings output/ """ @@ -872,7 +888,7 @@ process adapter_removal { //SE, collapse not possible, trim reads only """ mkdir -p output - AdapterRemoval --file1 ${r1} --basename ${base}.se --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + AdapterRemoval --file1 ${r1} --basename ${base}.se --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} ${preserve5p} --trimns --trimqualities ${adapters_to_remove} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} mv *.settings *.se.truncated.gz output/ """ } else if ( seqtype != 'PE' && params.skip_trim ) { diff --git a/nextflow.config b/nextflow.config index 9f9b30fc8..d5cf79f5e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -66,6 +66,7 @@ params { //Read clipping and merging parameters clip_forward_adaptor = 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC' clip_reverse_adaptor = 'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA' + clip_adapters_list = null clip_readlength = 30 clip_min_read_quality = 20 min_adap_overlap = 1 diff --git a/nextflow_schema.json b/nextflow_schema.json index bf4ea4551..835cccb68 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -464,6 +464,12 @@ "fa_icon": "fas fa-cut", "help_text": "Defines the adapter sequence to be used for the reverse read in paired end sequencing projects. This is set to `'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA'` by default.\n\n> Modifies AdapterRemoval parameter: `--adapter2`" }, + "clip_adapters_list": { + "type": "string", + "description": "Path to AdapterRemoval adapter list file. Overrides `--clip_*_adaptor` parameters", + "fa_icon": "fas fa-cut", + "help_text": "Allows to supply a file with a list of adapter (combinations) to remove from all files. **Overrides** the `--clip_*_adaptor` parameters . First column represents forward strand, second column for reverse strand. You must supply all possibly combinations, one per line, and this list is applied to all files. See [AdapterRemoval documentation](https://adapterremoval.readthedocs.io/en/latest/manpage.html) for more information.\n\n> Modifies AdapterRemoval parameter: `--adapter-list`" + }, "clip_readlength": { "type": "integer", "default": 30,