-
Notifications
You must be signed in to change notification settings - Fork 88
Expand file tree
/
Copy pathdeduplicate.nf
More file actions
160 lines (136 loc) · 6.05 KB
/
deduplicate.nf
File metadata and controls
160 lines (136 loc) · 6.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
//
// Carry out per-chromosome deduplication
//
include { addNewMetaFromAttributes } from '../../subworkflows/local/utils_nfcore_eager_pipeline/main'
include { BUILD_INTERVALS } from '../../modules/local/build_intervals'
include { BAM_SPLIT_BY_REGION } from '../../subworkflows/nf-core/bam_split_by_region/main'
include { PICARD_MARKDUPLICATES } from '../../modules/nf-core/picard/markduplicates/main'
include { DEDUP } from '../../modules/nf-core/dedup/main'
include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_DEDUPPED } from '../../modules/nf-core/samtools/merge/main'
include { SAMTOOLS_SORT as SAMTOOLS_SORT_DEDUPPED } from '../../modules/nf-core/samtools/sort/main'
include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_DEDUPPED } from '../../modules/nf-core/samtools/index/main'
include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_DEDUPPED } from '../../modules/nf-core/samtools/flagstat/main'
workflow DEDUPLICATE {
take:
ch_bam_bai // [ [ meta ], bam , bai ]
fasta // [ [ meta ], fasta ]
fasta_fai // [ [ meta ], fasta_fai ]
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
ch_refs = fasta.join(fasta_fai)
.map {
// Prepend a new meta that contains the meta.id value as the new_meta.reference attribute
addNewMetaFromAttributes( it, "id" , "reference" , false )
}
// Create genomic regions file for splitting the bam before deduplication
BUILD_INTERVALS( fasta_fai )
ch_versions = ch_versions.mix( BUILD_INTERVALS.out.versions.first() )
// Prep regions for combining
ch_intervals_for_join = BUILD_INTERVALS.out.bed
.map {
// Replace meta with new meta that contains the meta.id value in the meta.reference attribute only
addNewMetaFromAttributes( it, "id" , "reference" , true )
}
// Ensure input bam matches the regions file
ch_bam_for_split = ch_bam_bai
.map {
// Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute
addNewMetaFromAttributes( it, "reference" , "reference" , false )
}
.combine(
by: 0,
ch_intervals_for_join
)
.map {
ignore_me, meta, bam, bai, regions ->
[ meta, bam, bai, regions ]
}
//Split input bam by region
BAM_SPLIT_BY_REGION( ch_bam_for_split )
ch_versions = ch_versions.mix( BAM_SPLIT_BY_REGION.out.versions )
if ( params.deduplication_tool == 'markduplicates' ) {
ch_markduplicates_input = BAM_SPLIT_BY_REGION.out.bam_bai
.map {
// Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute
addNewMetaFromAttributes( it, "reference" , "reference" , false )
}
.combine(
by:0,
ch_refs
)
.multiMap{
ignore_me, meta, bam, bai, meta2, fasta_, fasta_fai_ ->
bam: [ meta, bam ]
fasta: [ meta2, fasta_ ]
fasta_fai: [ meta2, fasta_fai_ ]
}
// Dedup each bam
PICARD_MARKDUPLICATES(
ch_markduplicates_input.bam,
ch_markduplicates_input.fasta,
ch_markduplicates_input.fasta_fai
)
ch_versions = ch_versions.mix( PICARD_MARKDUPLICATES.out.versions.first() )
ch_dedupped_region_bam = PICARD_MARKDUPLICATES.out.bam
} else if ( params.deduplication_tool == "dedup" ) {
ch_dedup_input = BAM_SPLIT_BY_REGION.out.bam_bai
.map {
meta, bam, bai ->
[ meta, bam ]
}
DEDUP( ch_dedup_input )
ch_versions = ch_versions.mix( DEDUP.out.versions.first() )
ch_dedupped_region_bam = DEDUP.out.bam
}
ch_input_for_samtools_merge = ch_dedupped_region_bam
.map {
meta, bam ->
def meta2 = meta.clone().findAll{ it.key != 'genomic_region' }
[ meta2, bam ]
}
.groupTuple()
.map {
// Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute
addNewMetaFromAttributes( it, "reference" , "reference" , false )
}
.combine(
by:0,
ch_refs
)
.multiMap{
// bam here is a list of bams
ignore_me, meta, bam, meta2, fasta_, fasta_fai_ ->
bam: [ meta, bam ]
fasta: [ meta2, fasta_ ]
fasta_fai: [ meta2, fasta_fai_ ]
}
// Merge the bams for each region into one bam
SAMTOOLS_MERGE_DEDUPPED(
ch_input_for_samtools_merge.bam,
ch_input_for_samtools_merge.fasta,
ch_input_for_samtools_merge.fasta_fai
)
ch_versions = ch_versions.mix( SAMTOOLS_MERGE_DEDUPPED.out.versions )
// Sort the merged bam and index
SAMTOOLS_SORT_DEDUPPED ( SAMTOOLS_MERGE_DEDUPPED.out.bam )
ch_versions = ch_versions.mix( SAMTOOLS_SORT_DEDUPPED.out.versions )
ch_dedup_bam = SAMTOOLS_SORT_DEDUPPED.out.bam
SAMTOOLS_INDEX_DEDUPPED ( ch_dedup_bam )
ch_versions = ch_versions.mix( SAMTOOLS_INDEX_DEDUPPED.out.versions )
ch_dedup_bai = params.fasta_largeref ? SAMTOOLS_INDEX_DEDUPPED.out.csi : SAMTOOLS_INDEX_DEDUPPED.out.bai
// Finally run flagstat on the dedupped bam
ch_input_for_samtools_flagstat = ch_dedup_bam.join( ch_dedup_bai )
SAMTOOLS_FLAGSTAT_DEDUPPED(
ch_input_for_samtools_flagstat
)
ch_versions = ch_versions.mix( SAMTOOLS_FLAGSTAT_DEDUPPED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_FLAGSTAT_DEDUPPED.out.flagstat )
ch_dedup_flagstat = SAMTOOLS_FLAGSTAT_DEDUPPED.out.flagstat
emit:
bam = ch_dedup_bam
bai = ch_dedup_bai
flagstat = ch_dedup_flagstat
versions = ch_versions
mqc = ch_multiqc_files
}