Skip to content

Commit 56bfb0f

Browse files
authored
Merge pull request #1111 from jbv2/mergelane_mappedbaminput
merging lanes from baminput
2 parents 84dafb1 + aa94d5f commit 56bfb0f

3 files changed

Lines changed: 114 additions & 31 deletions

File tree

conf/modules.config

Lines changed: 40 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -361,29 +361,56 @@ process {
361361
//
362362
// BAM INPUT
363363
//
364-
withName: 'SAMTOOLS_FLAGSTATS_BAM_INPUT' {
365-
// NOTE This step becomes obsolete once a lane-merging step is added for input BAMs.
366-
// TODO Once a lane-merging step is added for input BAMs, the lane should be dropped from this tag.
364+
withName: SAMTOOLS_INDEX_BAM_INPUT {
367365
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
368-
ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.lane}_${meta.reference}" }
369366
publishDir = [
370-
[
371-
// stats
372-
path: { "${params.outdir}/mapping/bam_input/stats/" },
373-
mode: params.publish_dir_mode,
374-
pattern: '*.flagstat'
375-
]
367+
enabled: false
376368
]
377369
}
378370

379-
withName: SAMTOOLS_INDEX_BAM_INPUT {
380-
// NOTE This step becomes obsolete once a lane-merging step is added for input BAMs.
381-
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
371+
withName: SAMTOOLS_MERGE_LANES_BAMINPUT {
372+
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" }
373+
ext.args = { params.run_fastq_sharding ? "-c -p" : "" }
374+
ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" }
382375
publishDir = [
383376
enabled: false
384377
]
385378
}
386379

380+
withName: SAMTOOLS_SORT_MERGED_LANES_BAMINPUT {
381+
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" }
382+
ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_sorted" }
383+
publishDir = [
384+
// data
385+
path: { "${params.outdir}/mapping/${params.mapping_tool}/data/" },
386+
mode: params.publish_dir_mode,
387+
pattern: '*.{bam}'
388+
]
389+
}
390+
391+
withName: SAMTOOLS_INDEX_MERGED_LANES_BAMINPUT {
392+
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" }
393+
ext.args = { params.fasta_largeref ? "-c" : "" }
394+
ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" }
395+
publishDir = [
396+
// data
397+
path: { "${params.outdir}/mapping/${params.mapping_tool}/data/" },
398+
mode: params.publish_dir_mode,
399+
pattern: '*.{bai,csi}'
400+
]
401+
}
402+
403+
withName: SAMTOOLS_FLAGSTAT_MERGED_LANES_BAMINPUT {
404+
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" }
405+
ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_sorted" }
406+
publishDir = [
407+
// stats
408+
path: { "${params.outdir}/mapping/bam_input/stats/" },
409+
mode: params.publish_dir_mode,
410+
pattern: '*.flagstat'
411+
]
412+
}
413+
387414
//
388415
// BAM FILTERING
389416
//
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
//
2+
// Prepare reference indexing for downstream
3+
//
4+
5+
include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_LANES_BAMINPUT } from '../../modules/nf-core/samtools/merge/main'
6+
include { SAMTOOLS_SORT as SAMTOOLS_SORT_MERGED_LANES_BAMINPUT } from '../../modules/nf-core/samtools/sort/main'
7+
include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MERGED_LANES_BAMINPUT } from '../../modules/nf-core/samtools/index/main'
8+
include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_MERGED_LANES_BAMINPUT } from '../../modules/nf-core/samtools/flagstat/main'
9+
10+
workflow MERGE_LANES_INPUTBAM {
11+
take:
12+
bams // [ [meta], [bams] ]
13+
14+
main:
15+
ch_versions = Channel.empty()
16+
ch_multiqc_files = Channel.empty()
17+
18+
ch_input_for_lane_merge = bams
19+
.map { meta, bam -> [ meta.clone().findAll{ it.key !in ['lane', 'colour_chemistry', 'shard_number'] }, bam ] }
20+
.groupTuple()
21+
.branch {
22+
meta, bam ->
23+
merge: bam.size() > 1
24+
skip: true
25+
}
26+
27+
SAMTOOLS_MERGE_LANES_BAMINPUT ( ch_input_for_lane_merge.merge, [[], []], [[], []] )
28+
ch_versions.mix( SAMTOOLS_MERGE_LANES_BAMINPUT.out.versions )
29+
30+
// Then mix back merged and single lane libraries for everything downstream
31+
ch_mergedlanes_for_sorting = ch_input_for_lane_merge.skip
32+
.mix( SAMTOOLS_MERGE_LANES_BAMINPUT.out.bam )
33+
34+
SAMTOOLS_SORT_MERGED_LANES_BAMINPUT ( ch_mergedlanes_for_sorting )
35+
ch_mapped_bam = SAMTOOLS_SORT_MERGED_LANES_BAMINPUT.out.bam
36+
ch_versions.mix( SAMTOOLS_SORT_MERGED_LANES_BAMINPUT.out.versions )
37+
38+
SAMTOOLS_INDEX_MERGED_LANES_BAMINPUT( ch_mapped_bam )
39+
ch_mapped_bai = params.fasta_largeref ? SAMTOOLS_INDEX_MERGED_LANES_BAMINPUT.out.csi : SAMTOOLS_INDEX_MERGED_LANES_BAMINPUT.out.bai
40+
ch_versions.mix( SAMTOOLS_INDEX_MERGED_LANES_BAMINPUT.out.versions )
41+
42+
ch_input_for_flagstat = SAMTOOLS_SORT_MERGED_LANES_BAMINPUT.out.bam.join( SAMTOOLS_INDEX_MERGED_LANES_BAMINPUT.out.bai, failOnMismatch: true )
43+
44+
SAMTOOLS_FLAGSTAT_MERGED_LANES_BAMINPUT( ch_input_for_flagstat )
45+
ch_versions.mix(SAMTOOLS_FLAGSTAT_MERGED_LANES_BAMINPUT.out.versions.first() )
46+
ch_multiqc_files = ch_multiqc_files.mix(SAMTOOLS_FLAGSTAT_MERGED_LANES_BAMINPUT.out.flagstat )
47+
48+
emit:
49+
bam = ch_mapped_bam // [ [ meta ], bam ]
50+
bai = ch_mapped_bai // [ [ meta ], bai/csi ]
51+
flagstat = SAMTOOLS_FLAGSTAT_MERGED_LANES_BAMINPUT.out.flagstat // [ [ meta ], stats ]
52+
mqc = ch_multiqc_files
53+
versions = ch_versions
54+
55+
}

workflows/eager.nf

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ include { addNewMetaFromAttributes } from '../subwork
2424
include { REFERENCE_INDEXING } from '../subworkflows/local/reference_indexing'
2525
include { PREPROCESSING } from '../subworkflows/local/preprocessing'
2626
include { MAP } from '../subworkflows/local/map'
27+
include { MERGE_LANES_INPUTBAM } from '../subworkflows/local/merge_lanes_inputbam'
2728
include { FILTER_BAM } from '../subworkflows/local/bamfiltering.nf'
2829
include { DEDUPLICATE } from '../subworkflows/local/deduplicate'
2930
include { MANIPULATE_DAMAGE } from '../subworkflows/local/manipulate_damage'
@@ -56,7 +57,6 @@ include { FALCO } from '../modules
5657
include { MTNUCRATIO } from '../modules/nf-core/mtnucratio/main'
5758
include { HOST_REMOVAL } from '../modules/local/host_removal'
5859
include { ENDORSPY } from '../modules/nf-core/endorspy/main'
59-
include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTATS_BAM_INPUT } from '../modules/nf-core/samtools/flagstat/main'
6060
include { BEDTOOLS_COVERAGE as BEDTOOLS_COVERAGE_DEPTH } from '../modules/nf-core/bedtools/coverage/main'
6161
include { BEDTOOLS_COVERAGE as BEDTOOLS_COVERAGE_BREADTH } from '../modules/nf-core/bedtools/coverage/main'
6262
include { SAMTOOLS_VIEW_GENOME } from '../modules/local/samtools_view_genome.nf'
@@ -199,31 +199,31 @@ workflow EAGER {
199199
ch_bams_from_input = ch_samplesheet_bams.join(SAMTOOLS_INDEX_BAM_INPUT.out.csi)
200200
}
201201
else {
202-
ch_bams_from_input = ch_samplesheet_bams.join(SAMTOOLS_INDEX_BAM_INPUT.out.bai)
202+
ch_bams_from_input = ch_samplesheet_bams
203203
}
204204

205-
//
206-
// MODULE: flagstats of user supplied input BAMs
207-
//
208-
SAMTOOLS_FLAGSTATS_BAM_INPUT(ch_bams_from_input)
209-
ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTATS_BAM_INPUT.out.versions)
210-
ch_flagstat_input_bam = SAMTOOLS_FLAGSTATS_BAM_INPUT.out.flagstat
211-
}
212-
else {
213-
ch_bams_from_input = Channel.empty()
214-
ch_flagstat_input_bam = Channel.empty()
205+
// SUBWORKFLOW: Merging lanes for ch_bams_from_input
206+
207+
MERGE_LANES_INPUTBAM(ch_bams_from_input)
208+
ch_bams_from_input_lanemerged = MERGE_LANES_INPUTBAM.out.bam
209+
.join(MERGE_LANES_INPUTBAM.out.bai)
210+
ch_flagstat_bams_from_input_lanemerged = MERGE_LANES_INPUTBAM.out.flagstat
211+
212+
} else {
213+
ch_bams_from_input_lanemerged = Channel.empty()
214+
ch_flagstat_bams_from_input_lanemerged = Channel.empty()
215215
}
216216

217+
217218
//
218219
// SUBWORKFLOW: bam filtering (length, mapped/unmapped, quality etc.)
219220
//
220221

221222
if (params.run_bamfiltering || params.run_metagenomics) {
222223

223224
ch_mapped_for_bamfilter = MAP.out.bam
224-
.join(MAP.out.bai)
225-
.mix(ch_bams_from_input)
226-
225+
.join(MAP.out.bai)
226+
.mix(ch_bams_from_input_lanemerged)
227227
FILTER_BAM(ch_mapped_for_bamfilter)
228228
ch_bamfiltered_for_deduplication = FILTER_BAM.out.genomics
229229
ch_bamfiltered_for_metagenomics = FILTER_BAM.out.metagenomics
@@ -232,8 +232,8 @@ workflow EAGER {
232232
}
233233
else {
234234
ch_bamfiltered_for_deduplication = MAP.out.bam
235-
.join(MAP.out.bai)
236-
.mix(ch_bams_from_input)
235+
.join(MAP.out.bai)
236+
.mix(ch_bams_from_input_lanemerged)
237237
}
238238

239239
ch_reads_for_deduplication = ch_bamfiltered_for_deduplication
@@ -391,7 +391,8 @@ workflow EAGER {
391391
// MODULE: ENDORSPY (raw, filtered, deduplicated)
392392
//
393393

394-
ch_flagstat_for_endorspy_raw = MAP.out.flagstat.mix(ch_flagstat_input_bam)
394+
ch_flagstat_for_endorspy_raw = MAP.out.flagstat
395+
.mix( ch_flagstat_bams_from_input_lanemerged )
395396

396397
if (params.run_bamfiltering & !params.skip_deduplication) {
397398
ch_for_endorspy = ch_flagstat_for_endorspy_raw

0 commit comments

Comments
 (0)