Skip to content

Commit afd9455

Browse files
authored
Merge branch 'dev' into dsl2-preserve-damagemanipulation
2 parents 63ba76f + 56bfb0f commit afd9455

6 files changed

Lines changed: 117 additions & 33 deletions

File tree

conf/modules.config

Lines changed: 40 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -361,29 +361,56 @@ process {
361361
//
362362
// BAM INPUT
363363
//
364-
withName: 'SAMTOOLS_FLAGSTATS_BAM_INPUT' {
365-
// NOTE This step becomes obsolete once a lane-merging step is added for input BAMs.
366-
// TODO Once a lane-merging step is added for input BAMs, the lane should be dropped from this tag.
364+
withName: SAMTOOLS_INDEX_BAM_INPUT {
367365
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
368-
ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.lane}_${meta.reference}" }
369366
publishDir = [
370-
[
371-
// stats
372-
path: { "${params.outdir}/mapping/bam_input/stats/" },
373-
mode: params.publish_dir_mode,
374-
pattern: '*.flagstat'
375-
]
367+
enabled: false
376368
]
377369
}
378370

379-
withName: SAMTOOLS_INDEX_BAM_INPUT {
380-
// NOTE This step becomes obsolete once a lane-merging step is added for input BAMs.
381-
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
371+
withName: SAMTOOLS_MERGE_LANES_BAMINPUT {
372+
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" }
373+
ext.args = { params.run_fastq_sharding ? "-c -p" : "" }
374+
ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" }
382375
publishDir = [
383376
enabled: false
384377
]
385378
}
386379

380+
withName: SAMTOOLS_SORT_MERGED_LANES_BAMINPUT {
381+
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" }
382+
ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_sorted" }
383+
publishDir = [
384+
// data
385+
path: { "${params.outdir}/mapping/${params.mapping_tool}/data/" },
386+
mode: params.publish_dir_mode,
387+
pattern: '*.{bam}'
388+
]
389+
}
390+
391+
withName: SAMTOOLS_INDEX_MERGED_LANES_BAMINPUT {
392+
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" }
393+
ext.args = { params.fasta_largeref ? "-c" : "" }
394+
ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" }
395+
publishDir = [
396+
// data
397+
path: { "${params.outdir}/mapping/${params.mapping_tool}/data/" },
398+
mode: params.publish_dir_mode,
399+
pattern: '*.{bai,csi}'
400+
]
401+
}
402+
403+
withName: SAMTOOLS_FLAGSTAT_MERGED_LANES_BAMINPUT {
404+
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" }
405+
ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_sorted" }
406+
publishDir = [
407+
// stats
408+
path: { "${params.outdir}/mapping/bam_input/stats/" },
409+
mode: params.publish_dir_mode,
410+
pattern: '*.flagstat'
411+
]
412+
}
413+
387414
//
388415
// BAM FILTERING
389416
//

conf/test_krakenuniq.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,5 +32,5 @@ params {
3232
// Metagenomics
3333
run_metagenomics = true
3434
metagenomics_profiling_tool = 'krakenuniq'
35-
metagenomics_profiling_database = params.pipelines_testdata_base_path + 'eager/databases/krakenuniq/testdb-krakenuniq.tar.gz'
35+
metagenomics_profiling_database = params.pipelines_testdata_base_path + 'eager/databases/krakenuniq/eager3-mammoth-minimal.tar.gz'
3636
}

conf/test_metaphlan.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ params {
2424
config_profile_description = 'Minimal test dataset to check the metagenomics metaphlan4 pipeline function'
2525

2626
// Input data
27-
input = params.pipelines_testdata_base_path + 'eager/testdata/Mammoth/samplesheet_v3.tsv'
27+
input = params.pipelines_testdata_base_path + 'eager/testdata/Mammoth/samplesheet_v3_TOYMETAPHLAN.csv'
2828

2929
// Genome references
3030
fasta = params.pipelines_testdata_base_path + 'eager/reference/Mammoth/Mammoth_MT_Krause.fasta'
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
//
2+
// Prepare reference indexing for downstream
3+
//
4+
5+
include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_LANES_BAMINPUT } from '../../modules/nf-core/samtools/merge/main'
6+
include { SAMTOOLS_SORT as SAMTOOLS_SORT_MERGED_LANES_BAMINPUT } from '../../modules/nf-core/samtools/sort/main'
7+
include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MERGED_LANES_BAMINPUT } from '../../modules/nf-core/samtools/index/main'
8+
include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_MERGED_LANES_BAMINPUT } from '../../modules/nf-core/samtools/flagstat/main'
9+
10+
workflow MERGE_LANES_INPUTBAM {
11+
take:
12+
bams // [ [meta], [bams] ]
13+
14+
main:
15+
ch_versions = Channel.empty()
16+
ch_multiqc_files = Channel.empty()
17+
18+
ch_input_for_lane_merge = bams
19+
.map { meta, bam -> [ meta.clone().findAll{ it.key !in ['lane', 'colour_chemistry', 'shard_number'] }, bam ] }
20+
.groupTuple()
21+
.branch {
22+
meta, bam ->
23+
merge: bam.size() > 1
24+
skip: true
25+
}
26+
27+
SAMTOOLS_MERGE_LANES_BAMINPUT ( ch_input_for_lane_merge.merge, [[], []], [[], []] )
28+
ch_versions.mix( SAMTOOLS_MERGE_LANES_BAMINPUT.out.versions )
29+
30+
// Then mix back merged and single lane libraries for everything downstream
31+
ch_mergedlanes_for_sorting = ch_input_for_lane_merge.skip
32+
.mix( SAMTOOLS_MERGE_LANES_BAMINPUT.out.bam )
33+
34+
SAMTOOLS_SORT_MERGED_LANES_BAMINPUT ( ch_mergedlanes_for_sorting )
35+
ch_mapped_bam = SAMTOOLS_SORT_MERGED_LANES_BAMINPUT.out.bam
36+
ch_versions.mix( SAMTOOLS_SORT_MERGED_LANES_BAMINPUT.out.versions )
37+
38+
SAMTOOLS_INDEX_MERGED_LANES_BAMINPUT( ch_mapped_bam )
39+
ch_mapped_bai = params.fasta_largeref ? SAMTOOLS_INDEX_MERGED_LANES_BAMINPUT.out.csi : SAMTOOLS_INDEX_MERGED_LANES_BAMINPUT.out.bai
40+
ch_versions.mix( SAMTOOLS_INDEX_MERGED_LANES_BAMINPUT.out.versions )
41+
42+
ch_input_for_flagstat = SAMTOOLS_SORT_MERGED_LANES_BAMINPUT.out.bam.join( SAMTOOLS_INDEX_MERGED_LANES_BAMINPUT.out.bai, failOnMismatch: true )
43+
44+
SAMTOOLS_FLAGSTAT_MERGED_LANES_BAMINPUT( ch_input_for_flagstat )
45+
ch_versions.mix(SAMTOOLS_FLAGSTAT_MERGED_LANES_BAMINPUT.out.versions.first() )
46+
ch_multiqc_files = ch_multiqc_files.mix(SAMTOOLS_FLAGSTAT_MERGED_LANES_BAMINPUT.out.flagstat )
47+
48+
emit:
49+
bam = ch_mapped_bam // [ [ meta ], bam ]
50+
bai = ch_mapped_bai // [ [ meta ], bai/csi ]
51+
flagstat = SAMTOOLS_FLAGSTAT_MERGED_LANES_BAMINPUT.out.flagstat // [ [ meta ], stats ]
52+
mqc = ch_multiqc_files
53+
versions = ch_versions
54+
55+
}

subworkflows/nf-core/utils_nfcore_pipeline/main.nf

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

workflows/eager.nf

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ include { addNewMetaFromAttributes } from '../subwork
2424
include { REFERENCE_INDEXING } from '../subworkflows/local/reference_indexing'
2525
include { PREPROCESSING } from '../subworkflows/local/preprocessing'
2626
include { MAP } from '../subworkflows/local/map'
27+
include { MERGE_LANES_INPUTBAM } from '../subworkflows/local/merge_lanes_inputbam'
2728
include { FILTER_BAM } from '../subworkflows/local/bamfiltering.nf'
2829
include { DEDUPLICATE } from '../subworkflows/local/deduplicate'
2930
include { MANIPULATE_DAMAGE } from '../subworkflows/local/manipulate_damage'
@@ -55,7 +56,6 @@ include { FALCO } from '../modules
5556
include { MTNUCRATIO } from '../modules/nf-core/mtnucratio/main'
5657
include { HOST_REMOVAL } from '../modules/local/host_removal'
5758
include { ENDORSPY } from '../modules/nf-core/endorspy/main'
58-
include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTATS_BAM_INPUT } from '../modules/nf-core/samtools/flagstat/main'
5959
include { BEDTOOLS_COVERAGE as BEDTOOLS_COVERAGE_DEPTH } from '../modules/nf-core/bedtools/coverage/main'
6060
include { BEDTOOLS_COVERAGE as BEDTOOLS_COVERAGE_BREADTH } from '../modules/nf-core/bedtools/coverage/main'
6161
include { SAMTOOLS_VIEW_GENOME } from '../modules/local/samtools_view_genome.nf'
@@ -198,31 +198,31 @@ workflow EAGER {
198198
ch_bams_from_input = ch_samplesheet_bams.join(SAMTOOLS_INDEX_BAM_INPUT.out.csi)
199199
}
200200
else {
201-
ch_bams_from_input = ch_samplesheet_bams.join(SAMTOOLS_INDEX_BAM_INPUT.out.bai)
201+
ch_bams_from_input = ch_samplesheet_bams
202202
}
203203

204-
//
205-
// MODULE: flagstats of user supplied input BAMs
206-
//
207-
SAMTOOLS_FLAGSTATS_BAM_INPUT(ch_bams_from_input)
208-
ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTATS_BAM_INPUT.out.versions)
209-
ch_flagstat_input_bam = SAMTOOLS_FLAGSTATS_BAM_INPUT.out.flagstat
210-
}
211-
else {
212-
ch_bams_from_input = Channel.empty()
213-
ch_flagstat_input_bam = Channel.empty()
204+
// SUBWORKFLOW: Merging lanes for ch_bams_from_input
205+
206+
MERGE_LANES_INPUTBAM(ch_bams_from_input)
207+
ch_bams_from_input_lanemerged = MERGE_LANES_INPUTBAM.out.bam
208+
.join(MERGE_LANES_INPUTBAM.out.bai)
209+
ch_flagstat_bams_from_input_lanemerged = MERGE_LANES_INPUTBAM.out.flagstat
210+
211+
} else {
212+
ch_bams_from_input_lanemerged = Channel.empty()
213+
ch_flagstat_bams_from_input_lanemerged = Channel.empty()
214214
}
215215

216+
216217
//
217218
// SUBWORKFLOW: bam filtering (length, mapped/unmapped, quality etc.)
218219
//
219220

220221
if (params.run_bamfiltering || params.run_metagenomics) {
221222

222223
ch_mapped_for_bamfilter = MAP.out.bam
223-
.join(MAP.out.bai)
224-
.mix(ch_bams_from_input)
225-
224+
.join(MAP.out.bai)
225+
.mix(ch_bams_from_input_lanemerged)
226226
FILTER_BAM(ch_mapped_for_bamfilter)
227227
ch_bamfiltered_for_deduplication = FILTER_BAM.out.genomics
228228
ch_bamfiltered_for_metagenomics = FILTER_BAM.out.metagenomics
@@ -232,8 +232,8 @@ workflow EAGER {
232232
else {
233233
// TODO: more intuitive name for this?, since here we don't have filtered reads :P
234234
ch_bamfiltered_for_deduplication = MAP.out.bam
235-
.join(MAP.out.bai)
236-
.mix(ch_bams_from_input)
235+
.join(MAP.out.bai)
236+
.mix(ch_bams_from_input_lanemerged)
237237
}
238238

239239
ch_reads_for_deduplication = ch_bamfiltered_for_deduplication
@@ -392,7 +392,8 @@ workflow EAGER {
392392
// MODULE: ENDORSPY (raw, filtered, deduplicated)
393393
//
394394

395-
ch_flagstat_for_endorspy_raw = MAP.out.flagstat.mix(ch_flagstat_input_bam)
395+
ch_flagstat_for_endorspy_raw = MAP.out.flagstat
396+
.mix( ch_flagstat_bams_from_input_lanemerged )
396397

397398
if (params.run_bamfiltering & !params.skip_deduplication) {
398399
ch_for_endorspy = ch_flagstat_for_endorspy_raw

0 commit comments

Comments
 (0)