Skip to content

Commit c0c2997

Browse files
authored
Merge pull request #768 from nf-core/adapter-list
Add AR adapterList removal
2 parents 5ccc1c3 + 4b07aa5 commit c0c2997

5 files changed

Lines changed: 39 additions & 8 deletions

File tree

.github/workflows/ci.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,12 @@ jobs:
102102
- name: ADAPTERREMOVAL Run the basic pipeline with preserve5p end and merged reads only options
103103
run: |
104104
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --preserve5p --mergedonly
105+
- name: ADAPTER LIST Run the basic pipeline using an adapter list
106+
run: |
107+
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --clip_adapters_list 'https://github.com/nf-core/test-datasets/raw/eager/databases/adapters/adapter-list.txt'
108+
- name: ADAPTER LIST Run the basic pipeline using an adapter list, skipping adapter removal
109+
run: |
110+
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --clip_adapters_list 'https://github.com/nf-core/test-datasets/raw/eager/databases/adapters/adapter-list.txt' --skip_adapterremoval
105111
- name: MAPPER_CIRCULARMAPPER Test running with CircularMapper
106112
run: |
107113
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --mapper 'circularmapper' --circulartarget 'NC_007596.2'

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
77

88
### `Added`
99

10+
- [#651](https://github.com/nf-core/eager/issues/651) - Adds removal of adapters specified in an AdapterRemoval adapter list file
11+
1012
### `Fixed`
1113

1214
- [#771](https://github.com/nf-core/eager/issues/771) Remove legacy code

main.nf

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,20 @@ if( params.bt2_index && params.mapper == 'bowtie2' ){
227227
bwa_index_bwamem = Channel.empty()
228228
}
229229

230+
// Adapter removal adapter-list setup
231+
if ( !params.clip_adapters_list ) {
232+
Channel
233+
.fromPath("$projectDir/assets/nf-core_eager_dummy2.txt", checkIfExists: true)
234+
.ifEmpty { exit 1, "[nf-core/eager] error: adapters list file not found. Please check input. Supplied: --clip_adapters_list '${params.clip_adapters_list}'." }
235+
.into {ch_adapterlist}
236+
} else {
237+
Channel
238+
.fromPath("${params.clip_adapters_list}", checkIfExists: true)
239+
.ifEmpty { exit 1, "[nf-core/eager] error: adapters list file not found. Please check input. Supplied: --clip_adapters_list '${params.clip_adapters_list}'." }
240+
.into {ch_adapterlist}
241+
}
242+
243+
230244
// SexDetermination channel set up and bedfile validation
231245
if (!params.sexdeterrmine_bedfile) {
232246
ch_bed_for_sexdeterrmine = Channel.fromPath("$projectDir/assets/nf-core_eager_dummy.txt")
@@ -765,25 +779,27 @@ process adapter_removal {
765779

766780
input:
767781
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(r1), file(r2) from ch_fastp_for_adapterremoval
782+
path adapterlist from ch_adapterlist.collect().dump(tag: "Adapter list")
768783

769784
output:
770785
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("output/*{combined.fq,.se.truncated,pair1.truncated}.gz") into ch_output_from_adapterremoval_r1
771786
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("output/*pair2.truncated.gz") optional true into ch_output_from_adapterremoval_r2
772787
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("output/*.settings") into ch_adapterremoval_logs
773-
788+
774789
when:
775790
!params.skip_adapterremoval
776791

777792
script:
778-
base = "${r1.baseName}_L${lane}"
793+
def base = "${r1.baseName}_L${lane}"
794+
def adapters_to_remove = !params.clip_adapters_list ? "--adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor}" : "--adapter-list ${adapterlist}"
779795
//This checks whether we skip trimming and defines a variable respectively
780796
def preserve5p = params.preserve5p ? '--preserve5p' : '' // applies to any AR command - doesn't affect output file combination
781797

782798
if ( seqtype == 'PE' && !params.skip_collapse && !params.skip_trim && !params.mergedonly && !params.preserve5p ) {
783799
"""
784800
mkdir -p output
785801
786-
AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap}
802+
AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities ${adapters_to_remove} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap}
787803
788804
cat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz > output/${base}.pe.combined.tmp.fq.gz
789805
@@ -797,7 +813,7 @@ process adapter_removal {
797813
"""
798814
mkdir -p output
799815
800-
AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap}
816+
AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities ${adapters_to_remove} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap}
801817
802818
cat *.collapsed.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz > output/${base}.pe.combined.tmp.fq.gz
803819
@@ -810,7 +826,7 @@ process adapter_removal {
810826
} else if ( seqtype == 'PE' && !params.skip_collapse && !params.skip_trim && params.mergedonly && !params.preserve5p ) {
811827
"""
812828
mkdir -p output
813-
AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap}
829+
AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities ${adapters_to_remove} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap}
814830
815831
cat *.collapsed.gz *.collapsed.truncated.gz > output/${base}.pe.combined.tmp.fq.gz
816832
@@ -823,7 +839,7 @@ process adapter_removal {
823839
} else if ( seqtype == 'PE' && !params.skip_collapse && !params.skip_trim && params.mergedonly && params.preserve5p ) {
824840
"""
825841
mkdir -p output
826-
AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap}
842+
AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities ${adapters_to_remove} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap}
827843
828844
cat *.collapsed.gz > output/${base}.pe.combined.tmp.fq.gz
829845
@@ -864,15 +880,15 @@ process adapter_removal {
864880
} else if ( seqtype == 'PE' && params.skip_collapse && !params.skip_trim ) {
865881
"""
866882
mkdir -p output
867-
AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap}
883+
AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} ${preserve5p} --trimns --trimqualities ${adapters_to_remove} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap}
868884
869885
mv ${base}.pe.pair*.truncated.gz *.settings output/
870886
"""
871887
} else if ( seqtype != 'PE' && !params.skip_trim ) {
872888
//SE, collapse not possible, trim reads only
873889
"""
874890
mkdir -p output
875-
AdapterRemoval --file1 ${r1} --basename ${base}.se --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap}
891+
AdapterRemoval --file1 ${r1} --basename ${base}.se --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} ${preserve5p} --trimns --trimqualities ${adapters_to_remove} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap}
876892
mv *.settings *.se.truncated.gz output/
877893
"""
878894
} else if ( seqtype != 'PE' && params.skip_trim ) {

nextflow.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ params {
6666
//Read clipping and merging parameters
6767
clip_forward_adaptor = 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC'
6868
clip_reverse_adaptor = 'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA'
69+
clip_adapters_list = null
6970
clip_readlength = 30
7071
clip_min_read_quality = 20
7172
min_adap_overlap = 1

nextflow_schema.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,12 @@
464464
"fa_icon": "fas fa-cut",
465465
"help_text": "Defines the adapter sequence to be used for the reverse read in paired end sequencing projects. This is set to `'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA'` by default.\n\n> Modifies AdapterRemoval parameter: `--adapter2`"
466466
},
467+
"clip_adapters_list": {
468+
"type": "string",
469+
"description": "Path to AdapterRemoval adapter list file. Overrides `--clip_*_adaptor` parameters",
470+
"fa_icon": "fas fa-cut",
471+
"help_text": "Allows to supply a file with a list of adapter (combinations) to remove from all files. **Overrides** the `--clip_*_adaptor` parameters . First column represents forward strand, second column for reverse strand. You must supply all possibly combinations, one per line, and this list is applied to all files. See [AdapterRemoval documentation](https://adapterremoval.readthedocs.io/en/latest/manpage.html) for more information.\n\n> Modifies AdapterRemoval parameter: `--adapter-list`"
472+
},
467473
"clip_readlength": {
468474
"type": "integer",
469475
"default": 30,

0 commit comments

Comments
 (0)