Skip to content

Commit 6b02a40

Browse files
authored
Merge pull request #1098 from nf-core/metagenomics-pairedend
dsl2: metagenomics uncollapsed paired end
2 parents 059c33a + eadaeb2 commit 6b02a40

11 files changed

Lines changed: 416 additions & 218 deletions

File tree

conf/modules.config

Lines changed: 159 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,7 @@ process {
406406
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" }
407407
ext.args = [
408408
"-q ${params.bamfiltering_mappingquality}",
409-
params.bamfiltering_retainunmappedgenomicbam ? '' : "-F ${params.bamfilter_genomicbamfilterflag}",
409+
params.bamfiltering_retainunmappedgenomicbam ? '' : "-F ${params.bamfiltering_genomicbamfilterflag}",
410410
].join(' ').trim()
411411
ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_filtered" }
412412
publishDir = [
@@ -448,50 +448,45 @@ process {
448448
]
449449
}
450450

451-
withName: SAMTOOLS_FASTQ_MAPPED {
451+
withName: SAMTOOLS_FASTQ_SAVEBAMFILTERINGREADS {
452452
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" }
453-
ext.args = [
454-
params.metagenomics_input == 'all' ? '' : '-F 4',
455-
].join(' ').trim()
456-
ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_mapped" }
453+
ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_bamfiltering_fastq" }
457454
publishDir = [
458455
[
459456
// data
460457
path: { "${params.outdir}/read_filtering/fastq/data/" },
461458
mode: params.publish_dir_mode,
462459
pattern: '*.fastq.gz',
463-
enabled: params.bamfiltering_generatemappedfastq
460+
enabled: params.bamfiltering_generatefastq
464461
]
465462
]
466463
}
467464

468-
withName: SAMTOOLS_FASTQ_UNMAPPED {
465+
withName: SAMTOOLS_FASTQ_METAGENOMICS {
469466
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" }
470467
ext.args = [
471-
'-f 4',
468+
params.metagenomics_input == 'mapped' ? '-F 4': '',
469+
params.metagenomics_input == 'unmapped' ? '-f 4': '',
470+
// 'all' is left then with NO -F or -f flag, therefore all reads get sent to fastq
472471
].join(' ').trim()
473-
ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_unmapped" }
472+
ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_metagenomics_fastq_${params.metagenomics_input}" }
474473
publishDir = [
475474
[
476475
// data
477476
path: { "${params.outdir}/read_filtering/fastq/data/" },
478477
mode: params.publish_dir_mode,
479478
pattern: '*.fastq.gz',
480-
enabled: params.bamfiltering_generateunmappedfastq
479+
enabled: params.metagenomics_input_savefastq
481480
]
482481
]
483482
}
484483

485-
withName: 'CAT_FASTQ_UNMAPPED|CAT_FASTQ_MAPPED' {
484+
withName: 'CAT_FASTQ_METAGENOMICS' {
486485
tag = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" }
487486
ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" }
488487
publishDir = [
489488
[
490-
// data
491-
path: { "${params.outdir}/read_filtering/fastq/data/" },
492-
mode: params.publish_dir_mode,
493-
pattern: '*.fastq.gz',
494-
enabled: params.preprocessing_savepreprocessedreads
489+
enabled: false // NO publishing of concatenated fastq files for metagenomics, only outputs from SAMTOOLS_FASTQ_METAGENOMICS
495490
]
496491
]
497492
}
@@ -927,6 +922,153 @@ process {
927922
]
928923
}
929924

925+
withName: BBMAP_BBDUK {
926+
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" }
927+
ext.args = { "entropymask=f entropy=${params.metagenomics_complexity_entropy}" }
928+
ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_complexity" }
929+
publishDir = [
930+
path: { "${params.outdir}/metagenomics/complexity_filter/bbduk/" },
931+
mode: params.publish_dir_mode,
932+
pattern: '*.{fastq.gz,log}',
933+
enabled: params.metagenomics_complexity_savefastq
934+
]
935+
}
936+
937+
withName: MALT_RUN {
938+
ext.args = [
939+
"-m ${params.metagenomics_malt_mode}",
940+
"-at ${params.metagenomics_malt_alignmentmode}",
941+
"-top ${params.metagenomics_malt_toppercent}",
942+
"-id ${params.metagenomics_malt_minpercentidentity}",
943+
"-mq ${params.metagenomics_malt_maxqueries}",
944+
"--memoryMode ${params.metagenomics_malt_memorymode}",
945+
params.metagenomics_malt_minsupportmode == "percent" ? "-supp ${params.metagenomics_malt_minsupportpercent}" : "-sup ${params.metagenomics_malt_minsupportreads}",
946+
params.metagenomics_malt_savereads ? "--alignments ./" : ""
947+
].join(' ').trim()
948+
publishDir = [
949+
path: { "${params.outdir}/metagenomics/profiling/malt/" },
950+
mode: params.publish_dir_mode,
951+
pattern: '*.{rma6,log,sam.gz}'
952+
]
953+
ext.prefix = { "${meta.label}_${meta.id}-run" }
954+
}
955+
956+
withName: CAT_CAT_MALT {
957+
ext.prefix = { "${meta.id}_runtime_log_concatenated.log" }
958+
publishDir = [
959+
path: { "${params.outdir}/metagenomics/profiling/malt/" },
960+
mode: params.publish_dir_mode,
961+
pattern: '*.{log}'
962+
]
963+
}
964+
965+
withName: KRAKEN2_KRAKEN2 {
966+
tag = { "${meta.sample_id}|single_end_mode_${meta.single_end}" }
967+
ext.args = [
968+
params.metagenomics_kraken2_saveminimizers ? "--report-minimizer-data" : ""
969+
].join(' ').trim()
970+
ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" }
971+
publishDir = [
972+
path: { "${params.outdir}/metagenomics/profiling/kraken2/" },
973+
mode: params.publish_dir_mode,
974+
pattern: '*.{txt,fastq.gz}'
975+
]
976+
}
977+
978+
withName: KRAKENUNIQ_PRELOADEDKRAKENUNIQ {
979+
tag = { "single_end_mode_${meta.single_end}" }
980+
publishDir = [
981+
path: { "${params.outdir}/metagenomics/profiling/krakenuniq/" },
982+
mode: params.publish_dir_mode,
983+
pattern: '*.{txt,fastq.gz}'
984+
]
985+
ext.prefix = { "${meta.single_end}" }
986+
}
987+
988+
withName: METAPHLAN_METAPHLAN {
989+
publishDir = [
990+
path: { "${params.outdir}/metagenomics/profiling/metaphlan/" },
991+
mode: params.publish_dir_mode,
992+
pattern: '*.{biom,txt}'
993+
]
994+
ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" }
995+
}
996+
997+
withName: MALTEXTRACT {
998+
ext.args = [
999+
"-f ${params.metagenomics_maltextract_filter}",
1000+
"-a ${params.metagenomics_maltextract_toppercent}",
1001+
"--minPI ${params.metagenomics_maltextract_minpercentidentity}",
1002+
params.metagenomics_maltextract_destackingoff ? "--destackingOff" : "",
1003+
params.metagenomics_maltextract_downsamplingoff ? "--downSampOff" : "",
1004+
params.metagenomics_maltextract_duplicateremovaloff ? "--dupRemOff" : "",
1005+
params.metagenomics_maltextract_matches ? "--matches" : "",
1006+
params.metagenomics_maltextract_megansummary ? "--meganSummary" : "",
1007+
params.metagenomics_maltextract_usetopalignment ? "--useTopAlignment" : "",
1008+
{ meta.strandedness } == "single" ? '--singleStranded' : '',
1009+
].join(' ').trim()
1010+
publishDir = [
1011+
path: { "${params.outdir}/metagenomics/postprocessing/maltextract/" },
1012+
mode: params.publish_dir_mode,
1013+
pattern: 'results',
1014+
saveAs: { "${meta.id}" }
1015+
]
1016+
}
1017+
1018+
withName: MEGAN_RMA2INFO {
1019+
tag = {"${meta.id}"}
1020+
ext.args = "-c2c Taxonomy"
1021+
ext.prefix = { "${meta.id}" }
1022+
publishDir = [
1023+
path: { "${params.outdir}/metagenomics/postprocessing/megan_summaries/" },
1024+
mode: params.publish_dir_mode,
1025+
pattern: '*.{txt.gz,megan}'
1026+
]
1027+
}
1028+
1029+
withName: AMPS {
1030+
publishDir = [
1031+
path: { "${params.outdir}/metagenomics/postprocessing/maltextract/" },
1032+
mode: params.publish_dir_mode,
1033+
pattern: 'results'
1034+
]
1035+
errorStrategy = 'ignore' // required as it fails the run for low reads: https://github.com/rhuebler/HOPS/issues/9
1036+
}
1037+
1038+
withName: TAXPASTA_MERGE {
1039+
publishDir = [
1040+
path: { "${params.outdir}/metagenomics/postprocessing/taxpasta/" },
1041+
mode: params.publish_dir_mode,
1042+
pattern: '*.{csv,tsv,ods,xlsx,arrow,parquet,biom}'
1043+
]
1044+
ext.args = { "--profiler ${meta.profiler} --output ${meta.profiler}_taxpasta_table.tsv" }
1045+
}
1046+
1047+
withName: TAXPASTA_STANDARDISE {
1048+
publishDir = [
1049+
path: { "${params.outdir}/metagenomics/postprocessing/taxpasta/" },
1050+
mode: params.publish_dir_mode,
1051+
pattern: '*.{csv,tsv,ods,xlsx,arrow,parquet,biom}'
1052+
]
1053+
ext.args = { "--profiler ${meta.profiler} --output ${meta.profiler}taxpasta_table.tsv" }
1054+
}
1055+
1056+
//
1057+
// QUALIMAP
1058+
//
1059+
1060+
withName: 'QUALIMAP_BAMQC_WITHBED|QUALIMAP_BAMQC_NOBED' {
1061+
tag = { "${meta.reference}|${meta.sample_id}" }
1062+
publishDir = [
1063+
path: { "${params.outdir}/mapstats/qualimap/${meta.reference}/" },
1064+
mode: params.publish_dir_mode,
1065+
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
1066+
]
1067+
}
1068+
1069+
//
1070+
// DAMAGE CALCULATION
1071+
//
9301072
withName: DAMAGEPROFILER {
9311073
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" }
9321074
ext.args = [

docs/development/manual_tests.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -721,7 +721,7 @@ HOP001 ERR8958750 0 4 paired double half /workspace/eager/testing/test_data/ERR8
721721
HOP001 ERR8958751 0 2 paired double half /workspace/eager/testing/test_data/ERR8958751_1.fastq.gz_reduced.fastq.gz /workspace/eager/testing/test_data/ERR8958751_2.fastq.gz_reduced.fastq.gz NA NA
722722
HOP001 ERR8958752 0 2 paired double half /workspace/eager/testing/test_data/ERR8958752_1.fastq.gz_reduced.fastq.gz /workspace/eager/testing/test_data/ERR8958752_2.fastq.gz_reduced.fastq.gz NA NA
723723
HOP001 ERR8958753 0 2 paired double half /workspace/eager/testing/test_data/ERR8958753_1.fastq.gz_reduced.fastq.gz /workspace/eager/testing/test_data/ERR8958753_2.fastq.gz_reduced.fastq.gz NA NA
724-
HOP001 ERR8958754 0 2 paired double none /workspace/eager/testing/test_data/ERR8958754_1.fastq.gz_reduced.fastq.gz /workspace/eager/testing/test_data/ERR8958754_2.fastq.gz_reduced.fastq.gz NA NA" | sed 's/ /\t/g' > test.tsv
724+
HOP001 ERR8958754 0 2 paired double none /workspace/eager/testing/test_data/ERR8958754_1.fastq.gz_reduced.fastq.gz /workspace/eager/testing/test_data/ERR8958754_2.fastq.gz_reduced.fastq.gz NA NA" | sed 's/NA/ /g' | sed 's/ /\t/g' > test.tsv
725725

726726
nextflow run ../main.nf -profile docker \
727727
--input test.tsv \
@@ -738,6 +738,16 @@ nextflow run ../main.nf -profile docker \
738738
--metagenomics_malt_group_size 3
739739
```
740740
741+
# kraken2
742+
743+
nextflow run main.nf -profile docker \
744+
--input testing/test.tsv \
745+
--outdir ./out \
746+
--run_metagenomics \
747+
--metagenomics_profiling_tool kraken2 \
748+
--metagenomics_profiling_database /workspace/eager/testing/eager_test.tar.gz
749+
--preprocessing_skippairmerging
750+
741751
## Mapping statistics
742752
743753
### ENDOSPY

modules.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@
187187
},
188188
"krakenuniq/preloadedkrakenuniq": {
189189
"branch": "master",
190-
"git_sha": "a6eb17f65b3ee5761c25c075a6166c9f76733cee",
190+
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
191191
"installed_by": ["modules"]
192192
},
193193
"malt/run": {

modules/nf-core/krakenuniq/preloadedkrakenuniq/environment.yml

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)