Skip to content

Commit cbadab7

Browse files
authored
Merge pull request nf-core#607 from szilvajuhos/master
GATK4 first round without MuTect1 and indel realignment
2 parents 41e2eb3 + 15adbb6 commit cbadab7

23 files changed

Lines changed: 219 additions & 585 deletions

annotate.nf

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,23 +73,23 @@ vcfToAnnotate = Channel.create()
7373
vcfNotToAnnotate = Channel.create()
7474

7575
if (annotateVCF == []) {
76+
// we annote all available vcfs by default that we can find in the VariantCalling directory
7677
Channel.empty().mix(
7778
Channel.fromPath("${directoryMap.haplotypecaller}/*.vcf.gz")
7879
.flatten().map{vcf -> ['haplotypecaller', vcf]},
7980
Channel.fromPath("${directoryMap.manta}/*SV.vcf.gz")
8081
.flatten().map{vcf -> ['manta', vcf]},
81-
Channel.fromPath("${directoryMap.mutect1}/*.vcf.gz")
82-
.flatten().map{vcf -> ['mutect1', vcf]},
8382
Channel.fromPath("${directoryMap.mutect2}/*.vcf.gz")
8483
.flatten().map{vcf -> ['mutect2', vcf]},
85-
Channel.fromPath("${directoryMap.strelka}/*{somatic,variants}*.vcf.gz")
84+
Channel.fromPath("${directoryMap.strelka}/*{somatic,variants}*.vcf.gz") // Strelka only
8685
.flatten().map{vcf -> ['strelka', vcf]},
87-
Channel.fromPath("${directoryMap.strelkabp}/*{somatic,variants}*.vcf.gz")
86+
Channel.fromPath("${directoryMap.strelkabp}/*{somatic,variants}*.vcf.gz") // Strelka with Manta indel candidates
8887
.flatten().map{vcf -> ['strelkabp', vcf]}
8988
).choice(vcfToAnnotate, vcfNotToAnnotate) {
9089
annotateTools == [] || (annotateTools != [] && it[0] in annotateTools) ? 0 : 1
9190
}
9291
} else if (annotateTools == []) {
92+
// alternatively, annotate user-submitted VCFs
9393
list = ""
9494
annotateVCF.each{ list += ",${it}" }
9595
list = list.substring(1)
@@ -101,6 +101,10 @@ if (annotateVCF == []) {
101101

102102
vcfNotToAnnotate.close()
103103

104+
// as now have the list of VCFs to annotate, the first step is to annotate with allele frequencies, if there are any
105+
106+
107+
104108
(vcfForBCFtools, vcfForVCFtools, vcfForSnpeff, vcfForVep) = vcfToAnnotate.into(4)
105109

106110
vcfForVep = vcfForVep.map {
@@ -224,11 +228,12 @@ process RunVEP {
224228
finalannotator = annotator == "snpeff" ? 'merge' : 'vep'
225229
genome = params.genome == 'smallGRCh37' ? 'GRCh37' : params.genome
226230
"""
227-
vep \
231+
vep --dir /opt/vep/.vep/ \
228232
-i ${vcf} \
229233
-o ${vcf.simpleName}_VEP.ann.vcf \
230234
--assembly ${genome} \
231235
--cache \
236+
--cache_version 91 \
232237
--database \
233238
--everything \
234239
--filter_common \
@@ -346,7 +351,6 @@ def helpMessage() {
346351
log.info " Possible values are:"
347352
log.info " haplotypecaller (Annotate HaplotypeCaller output)"
348353
log.info " manta (Annotate Manta output)"
349-
log.info " mutect1 (Annotate MuTect1 output)"
350354
log.info " mutect2 (Annotate MuTect2 output)"
351355
log.info " strelka (Annotate Strelka output)"
352356
log.info " --annotateVCF"

buildContainers.nf

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -177,13 +177,6 @@ def grabRevision() {
177177
def defineContainersList(){
178178
// Return list of authorized containers
179179
return [
180-
'freebayes',
181-
'gatk',
182-
'gatk4',
183-
'igvtools',
184-
'mutect1',
185-
'picard',
186-
'qctools',
187180
'r-base',
188181
'runallelecount',
189182
'sarek',
@@ -208,8 +201,7 @@ def helpMessage() {
208201
log.info " --containers: Choose which containers to build"
209202
log.info " Default: all"
210203
log.info " Possible values:"
211-
log.info " all, freebayes, gatk, gatk4, igvtools, mutect1, picard"
212-
log.info " qctools, r-base, runallelecount, sarek, snpeff"
204+
log.info " all, r-base, runallelecount, sarek, snpeff"
213205
log.info " snpeffgrch37, snpeffgrch38, vepgrch37, vepgrch38"
214206
log.info " --docker: Build containers using Docker"
215207
log.info " --help"

buildReferences.nf

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ kate: syntax groovy; space-indent on; indent-width 2;
2929
- ProcessReference - Download all references if needed
3030
- DecompressFile - Extract files if needed
3131
- BuildBWAindexes - Build indexes for BWA
32-
- BuildPicardIndex - Build index with Picard
32+
- BuildReferenceIndex - Build index for FASTA refs
3333
- BuildSAMToolsIndex - Build index with SAMTools
3434
- BuildVCFIndex - Build index for VCF files
3535
================================================================================
@@ -98,7 +98,7 @@ if (params.verbose) ch_decompressedFiles = ch_decompressedFiles.view {
9898

9999
ch_fastaFile = Channel.create()
100100
ch_fastaForBWA = Channel.create()
101-
ch_fastaForPicard = Channel.create()
101+
ch_fastaReference = Channel.create()
102102
ch_fastaForSAMTools = Channel.create()
103103
ch_otherFile = Channel.create()
104104
ch_vcfFile = Channel.create()
@@ -108,7 +108,7 @@ ch_decompressedFiles
108108
it =~ ".fasta" ? 0 :
109109
it =~ ".vcf" ? 1 : 2}
110110

111-
(ch_fastaForBWA, ch_fastaForPicard, ch_fastaForSAMTools, ch_fastaFileToKeep) = ch_fastaFile.into(4)
111+
(ch_fastaForBWA, ch_fastaReference, ch_fastaForSAMTools, ch_fastaFileToKeep) = ch_fastaFile.into(4)
112112
(ch_vcfFile, ch_vcfFileToKeep) = ch_vcfFile.into(2)
113113

114114
ch_notCompressedfiles
@@ -137,29 +137,28 @@ if (params.verbose) bwaIndexes.flatten().view {
137137
"BWA index : ${it.fileName}"
138138
}
139139

140-
process BuildPicardIndex {
140+
process BuildReferenceIndex {
141141
tag {f_reference}
142142

143143
publishDir params.outDir, mode: 'link'
144144

145145
input:
146-
file(f_reference) from ch_fastaForPicard
146+
file(f_reference) from ch_fastaReference
147147

148148
output:
149-
file("*.dict") into ch_picardIndex
149+
file("*.dict") into ch_referenceIndex
150150

151151
script:
152152
"""
153-
java -Xmx${task.memory.toGiga()}g \
154-
-jar \$PICARD_HOME/picard.jar \
153+
gatk --java-options "-Xmx${task.memory.toGiga()}g" \
155154
CreateSequenceDictionary \
156-
REFERENCE=${f_reference} \
157-
OUTPUT=${f_reference.baseName}.dict
155+
--REFERENCE ${f_reference} \
156+
--OUTPUT ${f_reference.baseName}.dict
158157
"""
159158
}
160159

161-
if (params.verbose) ch_picardIndex.view {
162-
"Picard index : ${it.fileName}"
160+
if (params.verbose) ch_referenceIndex.view {
161+
"Reference index : ${it.fileName}"
163162
}
164163

165164
process BuildSAMToolsIndex {
@@ -196,7 +195,7 @@ process BuildVCFIndex {
196195

197196
script:
198197
"""
199-
\$IGVTOOLS_HOME/igvtools index ${f_reference}
198+
igvtools index ${f_reference}
200199
"""
201200
}
202201

configuration/containers.config

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,52 +9,48 @@
99

1010
process {
1111
$BuildBWAindexes.container = "${params.repository}/sarek:${params.tag}"
12-
$BuildPicardIndex.container = "${params.repository}/picard:${params.tag}"
12+
$BuildReferenceIndex.container = "${params.repository}/sarek:${params.tag}"
1313
$BuildSAMToolsIndex.container = "${params.repository}/sarek:${params.tag}"
14-
$BuildVCFIndex.container = "${params.repository}/igvtools:${params.tag}"
14+
$BuildVCFIndex.container = "${params.repository}/sarek:${params.tag}"
1515
$CompressVCF.container = "${params.repository}/sarek:${params.tag}"
1616
$ConcatVCF.container = "${params.repository}/sarek:${params.tag}"
17-
$CreateRecalibrationTable.container = "${params.repository}/gatk:${params.tag}"
18-
$GetVersionAll.container = "${params.repository}/qctools:${params.tag}"
17+
$CreateRecalibrationTable.container = "${params.repository}/sarek:${params.tag}"
18+
$GetVersionAll.container = "${params.repository}/sarek:${params.tag}"
1919
$GetVersionAlleleCount.container = "${params.repository}/runallelecount:${params.tag}"
2020
$GetVersionASCAT.container = "${params.repository}/r-base:${params.tag}"
21-
$GetVersionBamQC.container = "${params.repository}/qctools:${params.tag}"
21+
$GetVersionBamQC.container = "${params.repository}/sarek:${params.tag}"
2222
$GetVersionBCFtools.container = "${params.repository}/sarek:${params.tag}"
2323
$GetVersionBWAsamtools.container = "${params.repository}/sarek:${params.tag}"
24-
$GetVersionFastQC.container = "${params.repository}/qctools:${params.tag}"
25-
$GetVersionFreeBayes.container = "${params.repository}/freebayes:${params.tag}"
26-
$GetVersionGATK.container = "${params.repository}/gatk:${params.tag}"
24+
$GetVersionFastQC.container = "${params.repository}/sarek:${params.tag}"
25+
$GetVersionFreeBayes.container = "${params.repository}/sarek:${params.tag}"
26+
$GetVersionGATK.container = "${params.repository}/sarek:${params.tag}"
2727
$GetVersionManta.container = "${params.repository}/sarek:${params.tag}"
28-
$GetVersionPicard.container = "${params.repository}/picard:${params.tag}"
2928
$GetVersionSnpeff.container = {params.genome == 'GRCh38' ? "${params.repository}/snpeffgrch38:${params.tag}" : "${params.repository}/snpeffgrch37:${params.tag}"}
3029
$GetVersionStrelka.container = "${params.repository}/sarek:${params.tag}"
31-
$GetVersionVCFtools.container = "${params.repository}/qctools:${params.tag}"
30+
$GetVersionVCFtools.container = "${params.repository}/sarek:${params.tag}"
3231
$GetVersionVEP.container = {params.genome == 'GRCh38' ? "${params.repository}/vepgrch38:${params.tag}" : "${params.repository}/vepgrch37:${params.tag}"}
33-
$IndelRealigner.container = "${params.repository}/gatk:${params.tag}"
3432
$MapReads.container = "${params.repository}/sarek:${params.tag}"
35-
$MarkDuplicates.container = "${params.repository}/picard:${params.tag}"
33+
$MarkDuplicates.container = "${params.repository}/sarek:${params.tag}"
3634
$MergeBams.container = "${params.repository}/sarek:${params.tag}"
37-
$RealignerTargetCreator.container = "${params.repository}/gatk:${params.tag}"
38-
$RecalibrateBam.container = "${params.repository}/gatk:${params.tag}"
35+
$RecalibrateBam.container = "${params.repository}/sarek:${params.tag}"
3936
$RunAlleleCount.container = "${params.repository}/runallelecount:${params.tag}"
4037
$RunAscat.container = "${params.repository}/r-base:${params.tag}"
41-
$RunBamQC.container = "${params.repository}/qctools:${params.tag}"
38+
$RunBamQC.container = "${params.repository}/sarek:${params.tag}"
4239
$RunBcftoolsStats.container = "${params.repository}/sarek:${params.tag}"
4340
$RunConvertAlleleCounts.container = "${params.repository}/r-base:${params.tag}"
44-
$RunFastQC.container = "${params.repository}/qctools:${params.tag}"
45-
$RunFreeBayes.container = "${params.repository}/freebayes:${params.tag}"
46-
$RunGenotypeGVCFs.container = "${params.repository}/gatk:${params.tag}"
47-
$RunHaplotypecaller.container = "${params.repository}/gatk:${params.tag}"
41+
$RunFastQC.container = "${params.repository}/sarek:${params.tag}"
42+
$RunFreeBayes.container = "${params.repository}/sarek:${params.tag}"
43+
$RunGenotypeGVCFs.container = "${params.repository}/sarek:${params.tag}"
44+
$RunHaplotypecaller.container = "${params.repository}/sarek:${params.tag}"
4845
$RunManta.container = "${params.repository}/sarek:${params.tag}"
49-
$RunMultiQC.container = "${params.repository}/qctools:${params.tag}"
50-
$RunMutect1.container = "${params.repository}/mutect1:${params.tag}"
51-
$RunMutect2.container = "${params.repository}/gatk:${params.tag}"
46+
$RunMultiQC.container = "${params.repository}/sarek:${params.tag}"
47+
$RunMutect2.container = "${params.repository}/sarek:${params.tag}"
5248
$RunSamtoolsStats.container = "${params.repository}/sarek:${params.tag}"
5349
$RunSingleManta.container = "${params.repository}/sarek:${params.tag}"
5450
$RunSingleStrelka.container = "${params.repository}/sarek:${params.tag}"
5551
$RunSnpeff.container = {params.genome == 'GRCh38' ? "${params.repository}/snpeffgrch38:${params.tag}" : "${params.repository}/snpeffgrch37:${params.tag}"}
5652
$RunStrelka.container = "${params.repository}/sarek:${params.tag}"
5753
$RunStrelkaBP.container = "${params.repository}/sarek:${params.tag}"
58-
$RunVcftools.container = "${params.repository}/qctools:${params.tag}"
54+
$RunVcftools.container = "${params.repository}/sarek:${params.tag}"
5955
$RunVEP.container = {params.genome == 'GRCh38' ? "${params.repository}/vepgrch38:${params.tag}" : "${params.repository}/vepgrch37:${params.tag}"}
6056
}

configuration/genomes.config

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ params {
4242
knownIndels = "${params.genome_base}/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz"
4343
knownIndelsIndex = "${params.genome_base}/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi"
4444
snpeffDb = "GRCh38.86"
45+
// This a nasty-looking list of allele-frequencies files. Add/remove files to match to your sets
46+
//AF_files = "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf"
47+
//AF_indexes = "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf.idx"
4548
}
4649
'smallGRCh37' {
4750
acLoci = "${params.genome_base}/1000G_phase3_20130502_SNP_maf0.3.small.loci"

configuration/singularity-path.config

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -14,52 +14,50 @@ singularity {
1414

1515
process {
1616
$BuildBWAindexes.container = "${params.containerPath}/sarek-${params.tag}.img"
17-
$BuildPicardIndex.container = "${params.containerPath}/picard-${params.tag}.img"
17+
$BuildReferenceIndex.container = "${params.containerPath}/sarek-${params.tag}.img"
1818
$BuildSAMToolsIndex.container = "${params.containerPath}/sarek-${params.tag}.img"
19-
$BuildVCFIndex.container = "${params.containerPath}/igvtools-${params.tag}.img"
19+
$BuildVCFIndex.container = "${params.containerPath}/sarek-${params.tag}.img"
2020
$CompressVCF.container = "${params.containerPath}/sarek-${params.tag}.img"
2121
$ConcatVCF.container = "${params.containerPath}/sarek-${params.tag}.img"
22-
$CreateRecalibrationTable.container = "${params.containerPath}/gatk-${params.tag}.img"
23-
$GetVersionAll.container = "${params.containerPath}/qctools-${params.tag}.img"
22+
$CreateRecalibrationTable.container = "${params.containerPath}/sarek-${params.tag}.img"
23+
$GetVersionAll.container = "${params.containerPath}/sarek-${params.tag}.img"
2424
$GetVersionAlleleCount.container = "${params.containerPath}/runallelecount-${params.tag}.img"
2525
$GetVersionASCAT.container = "${params.containerPath}/r-base-${params.tag}.img"
26-
$GetVersionBamQC.container = "${params.containerPath}/qctools-${params.tag}.img"
26+
$GetVersionBamQC.container = "${params.containerPath}/sarek-${params.tag}.img"
2727
$GetVersionBCFtools.container = "${params.containerPath}/sarek-${params.tag}.img"
2828
$GetVersionBWAsamtools.container = "${params.containerPath}/sarek-${params.tag}.img"
29-
$GetVersionFastQC.container = "${params.containerPath}/qctools-${params.tag}.img"
30-
$GetVersionFreeBayes.container = "${params.containerPath}/freebayes-${params.tag}.img"
31-
$GetVersionGATK.container = "${params.containerPath}/gatk-${params.tag}.img"
29+
$GetVersionFastQC.container = "${params.containerPath}/sarek-${params.tag}.img"
30+
$GetVersionFreeBayes.container = "${params.containerPath}/sarek-${params.tag}.img"
31+
$GetVersionGATK.container = "${params.containerPath}/sarek-${params.tag}.img"
3232
$GetVersionManta.container = "${params.containerPath}/sarek-${params.tag}.img"
33-
$GetVersionPicard.container = "${params.containerPath}/picard-${params.tag}.img"
3433
$GetVersionSnpeff.container = {params.genome == 'GRCh38' ? "${params.containerPath}/snpeffgrch38-${params.tag}.img" : "${params.containerPath}/snpeffgrch37-${params.tag}.img"}
3534
$GetVersionStrelka.container = "${params.containerPath}/sarek-${params.tag}.img"
36-
$GetVersionVCFtools.container = "${params.containerPath}/qctools-${params.tag}.img"
35+
$GetVersionVCFtools.container = "${params.containerPath}/sarek-${params.tag}.img"
3736
$GetVersionVEP.container = {params.genome == 'GRCh38' ? "${params.containerPath}/vepgrch38-${params.tag}.img" : "${params.containerPath}/vepgrch37-${params.tag}.img"}
38-
$IndelRealigner.container = "${params.containerPath}/gatk-${params.tag}.img"
37+
$IndelRealigner.container = "${params.containerPath}/sarek-${params.tag}.img"
3938
$MapReads.container = "${params.containerPath}/sarek-${params.tag}.img"
40-
$MarkDuplicates.container = "${params.containerPath}/picard-${params.tag}.img"
39+
$MarkDuplicates.container = "${params.containerPath}/sarek-${params.tag}.img"
4140
$MergeBams.container = "${params.containerPath}/sarek-${params.tag}.img"
42-
$RealignerTargetCreator.container = "${params.containerPath}/gatk-${params.tag}.img"
43-
$RecalibrateBam.container = "${params.containerPath}/gatk-${params.tag}.img"
41+
$RealignerTargetCreator.container = "${params.containerPath}/sarek-${params.tag}.img"
42+
$RecalibrateBam.container = "${params.containerPath}/sarek-${params.tag}.img"
4443
$RunAlleleCount.container = "${params.containerPath}/runallelecount-${params.tag}.img"
4544
$RunAscat.container = "${params.containerPath}/r-base-${params.tag}.img"
46-
$RunBamQC.container = "${params.containerPath}/qctools-${params.tag}.img"
45+
$RunBamQC.container = "${params.containerPath}/sarek-${params.tag}.img"
4746
$RunBcftoolsStats.container = "${params.containerPath}/sarek-${params.tag}.img"
4847
$RunConvertAlleleCounts.container = "${params.containerPath}/r-base-${params.tag}.img"
49-
$RunFastQC.container = "${params.containerPath}/qctools-${params.tag}.img"
50-
$RunFreeBayes.container = "${params.containerPath}/freebayes-${params.tag}.img"
51-
$RunGenotypeGVCFs.container = "${params.containerPath}/gatk-${params.tag}.img"
52-
$RunHaplotypecaller.container = "${params.containerPath}/gatk-${params.tag}.img"
48+
$RunFastQC.container = "${params.containerPath}/sarek-${params.tag}.img"
49+
$RunFreeBayes.container = "${params.containerPath}/sarek-${params.tag}.img"
50+
$RunGenotypeGVCFs.container = "${params.containerPath}/sarek-${params.tag}.img"
51+
$RunHaplotypecaller.container = "${params.containerPath}/sarek-${params.tag}.img"
5352
$RunManta.container = "${params.containerPath}/sarek-${params.tag}.img"
54-
$RunMultiQC.container = "${params.containerPath}/qctools-${params.tag}.img"
55-
$RunMutect1.container = "${params.containerPath}/mutect1-${params.tag}.img"
56-
$RunMutect2.container = "${params.containerPath}/gatk-${params.tag}.img"
53+
$RunMultiQC.container = "${params.containerPath}/sarek-${params.tag}.img"
54+
$RunMutect2.container = "${params.containerPath}/sarek-${params.tag}.img"
5755
$RunSamtoolsStats.container = "${params.containerPath}/sarek-${params.tag}.img"
5856
$RunSingleManta.container = "${params.containerPath}/sarek-${params.tag}.img"
5957
$RunSingleStrelka.container = "${params.containerPath}/sarek-${params.tag}.img"
6058
$RunSnpeff.container = {params.genome == 'GRCh38' ? "${params.containerPath}/snpeffgrch38-${params.tag}.img" : "${params.containerPath}/snpeffgrch37-${params.tag}.img"}
6159
$RunStrelka.container = "${params.containerPath}/sarek-${params.tag}.img"
6260
$RunStrelkaBP.container = "${params.containerPath}/sarek-${params.tag}.img"
63-
$RunVcftools.container = "${params.containerPath}/qctools-${params.tag}.img"
61+
$RunVcftools.container = "${params.containerPath}/sarek-${params.tag}.img"
6462
$RunVEP.container = {params.genome == 'GRCh38' ? "${params.containerPath}/vepgrch38-${params.tag}.img" : "${params.containerPath}/vepgrch37-${params.tag}.img"}
6563
}

configuration/uppmax-localhost.config

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ process {
4343
$BuildBWAindexes {
4444
memory = {params.totalMemory} // TODO This is likely too high
4545
}
46-
$BuildPicardIndex {
46+
$BuildReferenceIndex {
4747
memory = {params.totalMemory} // TODO This is likely too high
4848
}
4949
$BuildSAMToolsIndex {
@@ -70,7 +70,9 @@ process {
7070
memory = {params.totalMemory}
7171
}
7272
$MarkDuplicates {
73-
memory = {params.singleCPUMem * 2 * task.attempt}
73+
// Actually the -Xmx value should be kept lower
74+
cpus = 16
75+
memory = {2 * params.singleCPUMem}
7476
}
7577
$MergeBams {
7678
cpus = 16
@@ -117,10 +119,6 @@ process {
117119
}
118120
$RunMultiQC {
119121
}
120-
$RunMutect1 {
121-
cpus = 1
122-
memory = {params.singleCPUMem * task.attempt}
123-
}
124122
$RunMutect2 {
125123
cpus = 1
126124
memory = {params.singleCPUMem * task.attempt}

0 commit comments

Comments
 (0)