@@ -29,39 +29,49 @@ workflow DEDUPLICATE {
2929 addNewMetaFromAttributes( it, " id" , " reference" , false )
3030 }
3131
32- // Create genomic regions file for splitting the bam before deduplication
33- BUILD_INTERVALS ( fasta_fai )
34- ch_versions = ch_versions. mix( BUILD_INTERVALS . out. versions. first() )
32+ if ( params. deduplication_skipregionsplit ) {
3533
36- // Prep regions for combining
37- ch_intervals_for_join = BUILD_INTERVALS . out. bed
38- .map {
39- // Replace meta with new meta that contains the meta.id value in the meta.reference attribute only
40- addNewMetaFromAttributes( it, " id" , " reference" , true )
41- }
34+ // No splitting of .bam files by contig, deduplicate all in one
35+ input_for_deduplication = ch_bam_bai
4236
43- // Ensure input bam matches the regions file
44- ch_bam_for_split = ch_bam_bai
45- .map {
46- // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute
47- addNewMetaFromAttributes( it, " reference" , " reference" , false )
48- }
49- .combine(
50- by : 0 ,
51- ch_intervals_for_join
52- )
37+ } else {
38+
39+ // Create genomic regions file for splitting the bam before deduplication
40+ BUILD_INTERVALS ( fasta_fai )
41+ ch_versions = ch_versions. mix( BUILD_INTERVALS . out. versions. first() )
42+
43+ // Prep regions for combining
44+ ch_intervals_for_join = BUILD_INTERVALS . out. bed
5345 .map {
54- ignore_me, meta, bam, bai, regions ->
55- [ meta, bam, bai, regions ]
46+ // Replace meta with new meta that contains the meta.id value in the meta.reference attribute only
47+ addNewMetaFromAttributes( it, " id " , " reference " , true )
5648 }
5749
58- // Split input bam by region
59- BAM_SPLIT_BY_REGION ( ch_bam_for_split )
60- ch_versions = ch_versions. mix( BAM_SPLIT_BY_REGION . out. versions )
50+ // Ensure input bam matches the regions file
51+ ch_bam_for_split = ch_bam_bai
52+ .map {
53+ // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute
54+ addNewMetaFromAttributes( it, " reference" , " reference" , false )
55+ }
56+ .combine(
57+ by : 0 ,
58+ ch_intervals_for_join
59+ )
60+ .map {
61+ ignore_me, meta, bam, bai, regions ->
62+ [ meta, bam, bai, regions ]
63+ }
64+
65+ // Split input bam by region
66+ BAM_SPLIT_BY_REGION ( ch_bam_for_split )
67+ input_for_deduplication = BAM_SPLIT_BY_REGION . out. bam_bai
68+ ch_versions = ch_versions. mix( BAM_SPLIT_BY_REGION . out. versions )
69+
70+ }
6171
6272 if ( params. deduplication_tool == ' markduplicates' ) {
6373
64- ch_markduplicates_input = BAM_SPLIT_BY_REGION . out . bam_bai
74+ ch_markduplicates_input = input_for_deduplication
6575 .map {
6676 // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute
6777 addNewMetaFromAttributes( it, " reference" , " reference" , false )
@@ -83,70 +93,83 @@ workflow DEDUPLICATE {
8393 ch_markduplicates_input. fasta,
8494 ch_markduplicates_input. fasta_fai
8595 )
86- ch_versions = ch_versions. mix( PICARD_MARKDUPLICATES . out. versions. first() )
96+ ch_versions = ch_versions. mix( PICARD_MARKDUPLICATES . out. versions. first() )
8797
88- ch_dedupped_region_bam = PICARD_MARKDUPLICATES . out. bam
98+ ch_dedupped_bam = PICARD_MARKDUPLICATES . out. bam
8999
90100 } else if ( params. deduplication_tool == " dedup" ) {
91- ch_dedup_input = BAM_SPLIT_BY_REGION . out . bam_bai
101+ ch_dedup_input = input_for_deduplication
92102 .map {
93103 meta, bam, bai ->
94104 [ meta, bam ]
95105 }
96106
97107 DEDUP ( ch_dedup_input )
98- ch_versions = ch_versions. mix( DEDUP . out. versions. first() )
108+ ch_versions = ch_versions. mix( DEDUP . out. versions. first() )
99109
100- ch_dedupped_region_bam = DEDUP . out. bam
110+ ch_dedupped_bam = DEDUP . out. bam
101111 }
102112
103- ch_input_for_samtools_merge = ch_dedupped_region_bam
104- .map {
105- meta, bam ->
106- meta2 = meta. clone(). findAll{ it. key != ' genomic_region' }
107- [ meta2, bam ]
108- }
109- .groupTuple()
110- .map {
111- // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute
112- addNewMetaFromAttributes( it, " reference" , " reference" , false )
113- }
114- .combine(
115- by :0 ,
116- ch_refs
113+ if ( params. deduplication_skipregionsplit ) {
114+
115+ // Bams were never split by region, so bypass of re-merging
116+ ch_input_for_samtools_sort_dedupped = ch_dedupped_bam
117+
118+ } else {
119+
120+ // Re-merging of bams-by-contig must take place after deduplciation
121+ ch_input_for_samtools_merge = ch_dedupped_bam
122+ .map {
123+ meta, bam ->
124+ meta2 = meta. clone(). findAll{ it. key != ' genomic_region' }
125+ [ meta2, bam ]
126+ }
127+ .groupTuple()
128+ .map {
129+ // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute
130+ addNewMetaFromAttributes( it, " reference" , " reference" , false )
131+ }
132+ .combine(
133+ by :0 ,
134+ ch_refs
135+ )
136+ .multiMap{
137+ // bam here is a list of bams
138+ ignore_me, meta, bam, meta2, fasta, fasta_fai ->
139+ bam : [ meta, bam ]
140+ fasta : [ meta2, fasta ]
141+ fasta_fai : [ meta2, fasta_fai ]
142+ }
143+
144+ // Merge the bams for each region into one bam
145+ SAMTOOLS_MERGE_DEDUPPED (
146+ ch_input_for_samtools_merge. bam,
147+ ch_input_for_samtools_merge. fasta,
148+ ch_input_for_samtools_merge. fasta_fai
117149 )
118- .multiMap{
119- // bam here is a list of bams
120- ignore_me, meta, bam, meta2, fasta, fasta_fai ->
121- bam : [ meta, bam ]
122- fasta : [ meta2, fasta ]
123- fasta_fai : [ meta2, fasta_fai ]
124- }
150+ ch_versions = ch_versions. mix( SAMTOOLS_MERGE_DEDUPPED . out. versions )
125151
126- // Merge the bams for each region into one bam
127- SAMTOOLS_MERGE_DEDUPPED (
128- ch_input_for_samtools_merge. bam,
129- ch_input_for_samtools_merge. fasta,
130- ch_input_for_samtools_merge. fasta_fai
131- )
132- ch_versions = ch_versions. mix( SAMTOOLS_MERGE_DEDUPPED . out. versions )
152+ ch_input_for_samtools_sort_dedupped = SAMTOOLS_MERGE_DEDUPPED . out. bam
153+
154+ }
133155
134156
135157 // Sort the merged bam and index
136- SAMTOOLS_SORT_DEDUPPED ( SAMTOOLS_MERGE_DEDUPPED . out . bam )
158+ SAMTOOLS_SORT_DEDUPPED ( ch_input_for_samtools_sort_dedupped )
137159 ch_versions = ch_versions. mix( SAMTOOLS_SORT_DEDUPPED . out. versions )
138160 ch_dedup_bam = SAMTOOLS_SORT_DEDUPPED . out. bam
139161
140162 SAMTOOLS_INDEX_DEDUPPED ( ch_dedup_bam )
141163 ch_versions = ch_versions. mix( SAMTOOLS_INDEX_DEDUPPED . out. versions )
142- ch_dedup_bai = params. fasta_largeref ? SAMTOOLS_INDEX_DEDUPPED . out. csi : SAMTOOLS_INDEX_DEDUPPED . out. bai
164+ ch_dedup_bai = params. fasta_largeref ? SAMTOOLS_INDEX_DEDUPPED . out. csi : SAMTOOLS_INDEX_DEDUPPED . out. bai
143165
144166 // Finally run flagstat on the dedupped bam
145167 ch_input_for_samtools_flagstat = ch_dedup_bam. join( ch_dedup_bai )
146168
147169 SAMTOOLS_FLAGSTAT_DEDUPPED (
148170 ch_input_for_samtools_flagstat
149171 )
172+
150173 ch_versions = ch_versions. mix( SAMTOOLS_FLAGSTAT_DEDUPPED . out. versions )
151174 ch_multiqc_files = ch_multiqc_files. mix( SAMTOOLS_FLAGSTAT_DEDUPPED . out. flagstat )
152175 ch_dedup_flagstat = SAMTOOLS_FLAGSTAT_DEDUPPED . out. flagstat
0 commit comments