Skip to content
This repository was archived by the owner on Jan 27, 2020. It is now read-only.

Commit 3936cca

Browse files
author
Szilveszter Juhos
authored
Merge pull request #602 from MaxUlysse/CompressAnnVCFs
Cool, https://goo.gl/Yc3rkv
2 parents 1b4016f + 65fe1b2 commit 3936cca

4 files changed

Lines changed: 48 additions & 39 deletions

File tree

annotate.nf

Lines changed: 39 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -75,17 +75,17 @@ vcfNotToAnnotate = Channel.create()
7575
if (annotateVCF == []) {
7676
Channel.empty().mix(
7777
Channel.fromPath("${directoryMap.haplotypecaller}/*.vcf.gz")
78-
.flatten().map{vcf -> ['none', 'haplotypecaller', vcf, null]},
78+
.flatten().map{vcf -> ['haplotypecaller', vcf]},
7979
Channel.fromPath("${directoryMap.manta}/*SV.vcf.gz")
80-
.flatten().map{vcf -> ['none', 'manta', vcf, null]},
80+
.flatten().map{vcf -> ['manta', vcf]},
8181
Channel.fromPath("${directoryMap.mutect1}/*.vcf.gz")
82-
.flatten().map{vcf -> ['none', 'mutect1', vcf, null]},
82+
.flatten().map{vcf -> ['mutect1', vcf]},
8383
Channel.fromPath("${directoryMap.mutect2}/*.vcf.gz")
84-
.flatten().map{vcf -> ['none', 'mutect2', vcf, null]},
84+
.flatten().map{vcf -> ['mutect2', vcf]},
8585
Channel.fromPath("${directoryMap.strelka}/*{somatic,variants}*.vcf.gz")
86-
.flatten().map{vcf -> ['none', 'strelka', vcf, null]},
86+
.flatten().map{vcf -> ['strelka', vcf]},
8787
Channel.fromPath("${directoryMap.strelkabp}/*{somatic,variants}*.vcf.gz")
88-
.flatten().map{vcf -> ['none', 'strelkabp', vcf, null]}
88+
.flatten().map{vcf -> ['strelkabp', vcf]}
8989
).choice(vcfToAnnotate, vcfNotToAnnotate) {
9090
annotateTools == [] || (annotateTools != [] && it[0] in annotateTools) ? 0 : 1
9191
}
@@ -94,25 +94,30 @@ if (annotateVCF == []) {
9494
annotateVCF.each{ list += ",${it}" }
9595
list = list.substring(1)
9696
if (StringUtils.countMatches("${list}", ",") == 0) vcfToAnnotate = Channel.fromPath("${list}")
97-
.map{vcf -> ['none', 'userspecified', vcf, null]}
97+
.map{vcf -> ['userspecified', vcf]}
9898
else vcfToAnnotate = Channel.fromPath("{$list}")
99-
.map{vcf -> ['none', 'userspecified', vcf, null]}
99+
.map{vcf -> ['userspecified', vcf]}
100100
} else exit 1, "specify only tools or files to annotate, not both"
101101

102102
vcfNotToAnnotate.close()
103103

104104
(vcfForBCFtools, vcfForVCFtools, vcfForSnpeff, vcfForVep) = vcfToAnnotate.into(4)
105105

106+
vcfForVep = vcfForVep.map {
107+
variantCaller, vcf ->
108+
["vep", variantCaller, vcf, null]
109+
}
110+
106111
process RunBcftoolsStats {
107112
tag {vcf}
108113

109114
publishDir directoryMap.bcftoolsStats, mode: 'link'
110115

111116
input:
112-
set annotator, variantCaller, file(vcf), file(idx) from vcfForBCFtools
117+
set variantCaller, file(vcf) from vcfForBCFtools
113118

114119
output:
115-
file ("${vcf.baseName}.bcf.tools.stats.out") into bcfReport
120+
file ("*.bcf.tools.stats.out") into bcfReport
116121

117122
when: !params.noReports
118123

@@ -130,10 +135,10 @@ process RunVcftools {
130135
publishDir directoryMap.vcftools, mode: 'link'
131136

132137
input:
133-
set annotator, variantCaller, file(vcf), file(idx) from vcfForVCFtools
138+
set variantCaller, file(vcf) from vcfForVCFtools
134139

135140
output:
136-
file ("${vcf.baseName}.*") into vcfReport
141+
file ("${vcf.simpleName}.*") into vcfReport
137142

138143
when: !params.noReports
139144

@@ -146,21 +151,21 @@ if (params.verbose) vcfReport = vcfReport.view {
146151
}
147152

148153
process RunSnpeff {
149-
tag {vcf}
154+
tag {"${variantCaller} - ${vcf}"}
150155

151156
publishDir params.outDir, mode: 'link', saveAs: {
152-
if (it == "${vcf.baseName}.snpEff.csv") "${directoryMap.snpeffReports}/${it}"
153-
else if (it == "${vcf.baseName}.snpEff.ann.vcf") null
157+
if (it == "${vcf.simpleName}_snpEff.csv") "${directoryMap.snpeffReports}/${it}"
158+
else if (it == "${vcf.simpleName}_snpEff.ann.vcf") null
154159
else "${directoryMap.snpeff}/${it}"
155160
}
156161

157162
input:
158-
set annotator, variantCaller, file(vcf), file(idx) from vcfForSnpeff
163+
set variantCaller, file(vcf) from vcfForSnpeff
159164
val snpeffDb from Channel.value(params.genomes[params.genome].snpeffDb)
160165

161166
output:
162-
set file("${vcf.baseName}.snpEff.genes.txt"), file("${vcf.baseName}.snpEff.csv"), file("${vcf.baseName}.snpEff.summary.html") into snpeffOutput
163-
set val("snpeff"), variantCaller, file("${vcf.baseName}.snpEff.ann.vcf") into snpeffVCF
167+
set file("${vcf.simpleName}_snpEff.genes.txt"), file("${vcf.simpleName}_snpEff.csv"), file("${vcf.simpleName}_snpEff.summary.html") into snpeffOutput
168+
set val("snpeff"), variantCaller, file("${vcf.simpleName}_snpEff.ann.vcf") into snpeffVCF
164169

165170
when: 'snpeff' in tools || 'merge' in tools
166171

@@ -169,14 +174,14 @@ process RunSnpeff {
169174
java -Xmx${task.memory.toGiga()}g \
170175
-jar \$SNPEFF_HOME/snpEff.jar \
171176
${snpeffDb} \
172-
-csvStats ${vcf.baseName}.snpEff.csv \
177+
-csvStats ${vcf.simpleName}_snpEff.csv \
173178
-nodownload \
174179
-canon \
175180
-v \
176181
${vcf} \
177-
> ${vcf.baseName}.snpEff.ann.vcf
182+
> ${vcf.simpleName}_snpEff.ann.vcf
178183
179-
mv snpEff_summary.html ${vcf.baseName}.snpEff.summary.html
184+
mv snpEff_summary.html ${vcf.simpleName}_snpEff.summary.html
180185
"""
181186
}
182187

@@ -194,41 +199,44 @@ if('merge' in tools) {
194199
vcfCompressed = Channel.create()
195200

196201
vcfForVep = Channel.empty().mix(
197-
vcfCompressed.until({it[0]!="snpeff"})
202+
vcfCompressed.until({ it[0]=="merge" })
198203
)
199204
}
200205

201206
process RunVEP {
202-
tag {vcf}
207+
tag {"${variantCaller} - ${vcf}"}
203208

204209
publishDir params.outDir, mode: 'link', saveAs: {
205-
if (it == "${vcf.baseName}.vep.summary.html") "${directoryMap.vep}/${it}"
210+
if (it == "${vcf.simpleName}_VEP.summary.html") "${directoryMap.vep}/${it}"
206211
else null
207212
}
208213

209214
input:
210215
set annotator, variantCaller, file(vcf), file(idx) from vcfForVep
211216

212217
output:
213-
set val("vep"), variantCaller, file("${vcf.baseName}.vep.ann.vcf") into vepVCF
214-
file("${vcf.baseName}.vep.summary.html") into vepReport
218+
set finalannotator, variantCaller, file("${vcf.simpleName}_VEP.ann.vcf") into vepVCF
219+
file("${vcf.simpleName}_VEP.summary.html") into vepReport
215220

216221
when: 'vep' in tools || 'merge' in tools
217222

218223
script:
224+
finalannotator = annotator == "snpeff" ? 'merge' : 'vep'
219225
genome = params.genome == 'smallGRCh37' ? 'GRCh37' : params.genome
220226
"""
221227
vep \
222228
-i ${vcf} \
223-
-o ${vcf.baseName}.vep.ann.vcf \
224-
--stats_file ${vcf.baseName}.vep.summary.html \
229+
-o ${vcf.simpleName}_VEP.ann.vcf \
230+
--assembly ${genome} \
225231
--cache \
232+
--database \
226233
--everything \
227234
--filter_common \
235+
--fork ${task.cpus} \
228236
--format vcf \
229237
--offline \
230238
--per_gene \
231-
--fork ${task.cpus} \
239+
--stats_file ${vcf.simpleName}_VEP.summary.html \
232240
--total_length \
233241
--vcf
234242
"""
@@ -244,7 +252,7 @@ vcfToCompress = snpeffVCF.mix(vepVCF)
244252
process CompressVCF {
245253
tag {"${annotator} - ${vcf}"}
246254

247-
publishDir "${directoryMap."$annotator"}", mode: 'link'
255+
publishDir "${directoryMap."$finalannotator"}", mode: 'link'
248256

249257
input:
250258
set annotator, variantCaller, file(vcf) from vcfToCompress
@@ -253,6 +261,7 @@ process CompressVCF {
253261
set annotator, variantCaller, file("*.vcf.gz"), file("*.vcf.gz.tbi") into (vcfCompressed, vcfCompressedoutput)
254262

255263
script:
264+
finalannotator = annotator == "merge" ? "vep" : annotator
256265
"""
257266
bgzip < ${vcf} > ${vcf}.gz
258267
tabix ${vcf}.gz

germlineVC.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ process RunBcftoolsStats {
560560
set variantCaller, file(vcf) from vcfForBCFtools
561561

562562
output:
563-
file ("${vcf.baseName}.bcf.tools.stats.out") into bcfReport
563+
file ("${vcf.simpleName}.bcf.tools.stats.out") into bcfReport
564564

565565
when: !params.noReports
566566

@@ -583,7 +583,7 @@ process RunVcftools {
583583
set variantCaller, file(vcf) from vcfForVCFtools
584584

585585
output:
586-
file ("${vcf.baseName}.*") into vcfReport
586+
file ("${vcf.simpleName}.*") into vcfReport
587587

588588
when: !params.noReports
589589

lib/QC.groovy

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class QC {
1313
// Run bcftools on vcf file
1414
static def bcftools(vcf) {
1515
"""
16-
bcftools stats ${vcf} > ${vcf.baseName}.bcf.tools.stats.out
16+
bcftools stats ${vcf} > ${vcf.simpleName}.bcf.tools.stats.out
1717
"""
1818
}
1919

@@ -30,22 +30,22 @@ class QC {
3030
vcftools \
3131
--gzvcf ${vcf} \
3232
--relatedness2 \
33-
--out ${vcf.baseName}
33+
--out ${vcf.simpleName}
3434
3535
vcftools \
3636
--gzvcf ${vcf} \
3737
--TsTv-by-count \
38-
--out ${vcf.baseName}
38+
--out ${vcf.simpleName}
3939
4040
vcftools \
4141
--gzvcf ${vcf} \
4242
--TsTv-by-qual \
43-
--out ${vcf.baseName}
43+
--out ${vcf.simpleName}
4444
4545
vcftools \
4646
--gzvcf ${vcf} \
4747
--FILTER-summary \
48-
--out ${vcf.baseName}
48+
--out ${vcf.simpleName}
4949
"""
5050
}
5151

somaticVC.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -811,7 +811,7 @@ process RunBcftoolsStats {
811811
set variantCaller, file(vcf) from vcfForBCFtools
812812

813813
output:
814-
file ("${vcf.baseName}.bcf.tools.stats.out") into bcfReport
814+
file ("${vcf.simpleName}.bcf.tools.stats.out") into bcfReport
815815

816816
when: !params.noReports
817817

@@ -834,7 +834,7 @@ process RunVcftools {
834834
set variantCaller, file(vcf) from vcfForVCFtools
835835

836836
output:
837-
file ("${vcf.baseName}.*") into vcfReport
837+
file ("${vcf.simpleName}.*") into vcfReport
838838

839839
when: !params.noReports
840840

0 commit comments

Comments
 (0)