diff --git a/annotate.nf b/annotate.nf
index 9414a29470..84a5cc9e92 100644
--- a/annotate.nf
+++ b/annotate.nf
@@ -57,7 +57,7 @@ tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()}
annotateTools = params.annotateTools ? params.annotateTools.split(',').collect{it.trim().toLowerCase()} : []
annotateVCF = params.annotateVCF ? params.annotateVCF.split(',').collect{it.trim()} : []
-directoryMap = defineDirectoryMap()
+directoryMap = SarekUtils.defineDirectoryMap(params.outDir)
toolList = defineToolList()
if (!SarekUtils.checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information'
@@ -102,7 +102,7 @@ if (annotateVCF == []) {
vcfNotToAnnotate.close()
-(vcfForBCFtools, vcfForSnpeff, vcfForVep) = vcfToAnnotate.into(3)
+(vcfForBCFtools, vcfForVCFtools, vcfForSnpeff, vcfForVep) = vcfToAnnotate.into(4)
process RunBcftoolsStats {
tag {vcf}
@@ -117,10 +117,7 @@ process RunBcftoolsStats {
when: !params.noReports
- script:
- """
- bcftools stats ${vcf} > ${vcf.baseName}.bcf.tools.stats.out
- """
+ script: QC.bcftools(vcf)
}
if (params.verbose) bcfReport = bcfReport.view {
@@ -128,6 +125,27 @@ if (params.verbose) bcfReport = bcfReport.view {
File : [${it.fileName}]"
}
+process RunVcftools {
+ tag {vcf}
+
+ publishDir directoryMap.vcftools, mode: 'link'
+
+ input:
+ set variantCaller, file(vcf) from vcfForVCFtools
+
+ output:
+ file ("${vcf.baseName}.*") into vcfReport
+
+ when: !params.noReports
+
+ script: QC.vcftools(vcf)
+}
+
+if (params.verbose) vcfReport = vcfReport.view {
+ "VCFTools stats report:\n\
+ File : [${it.fileName}]"
+}
+
process RunSnpeff {
tag {vcf}
@@ -208,6 +226,34 @@ if (params.verbose) vepReport = vepReport.view {
Files : ${it.fileName}"
}
+process GetVersionBCFtools {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: !params.noReports
+ script: QC.getVersionBCFtools()
+}
+
+process GetVersionSnpEFF {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: 'snpeff' in tools || 'merge' in tools
+ script: QC.getVersionSnpEFF()
+}
+
+process GetVersionVCFtools {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: !params.noReports
+ script: QC.getVersionVCFtools()
+}
+
+process GetVersionVEP {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: 'vep' in tools || 'merge' in tools
+ script: QC.getVersionVEP()
+}
+
/*
================================================================================
= F U N C T I O N S =
@@ -219,26 +265,11 @@ def checkUppmaxProject() {
return !(workflow.profile == 'slurm' && !params.project)
}
-def defineDirectoryMap() {
- return [
- 'haplotypecaller' : "${params.outDir}/VariantCalling/HaplotypeCaller",
- 'manta' : "${params.outDir}/VariantCalling/Manta",
- 'mutect1' : "${params.outDir}/VariantCalling/MuTect1",
- 'mutect2' : "${params.outDir}/VariantCalling/MuTect2",
- 'strelka' : "${params.outDir}/VariantCalling/Strelka",
- 'strelkabp' : "${params.outDir}/VariantCalling/StrelkaBP",
- 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats",
- 'snpeffReports' : "${params.outDir}/Reports/SnpEff",
- 'snpeff' : "${params.outDir}/Annotation/SnpEff",
- 'vep' : "${params.outDir}/Annotation/VEP"
- ]
-}
-
def defineToolList() {
return [
+ 'merge',
'snpeff',
- 'vep',
- 'merge'
+ 'vep'
]
}
diff --git a/bin/scrape_tool_versions.py b/bin/scrape_tool_versions.py
new file mode 100755
index 0000000000..fcc61c4c7f
--- /dev/null
+++ b/bin/scrape_tool_versions.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+from __future__ import print_function
+from collections import OrderedDict
+import re
+
+regexes = {
+ 'AlleleCount': ['v_allelecount.txt', r"(\S+)"],
+ 'ASCAT': ['v_ascat.txt', r"(\d\.\d+)"],
+ 'bcftools': ['v_bcftools.txt', r"bcftools (\S+)"],
+ 'BWA': ['v_bwa.txt', r"Version: (\S+)"],
+ 'FastQC': ['v_fastqc.txt', r"FastQC v(\S+)"],
+ 'GATK': ['v_gatk.txt', r"GATK version(\S+)"],
+ 'htslib': ['v_samtools.txt', r"htslib (\S+)"],
+ 'Manta': ['v_manta.txt', r"([0-9.]+)"],
+ 'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"],
+ 'Nextflow': ['v_nextflow.txt', r"(\S+)"],
+ 'FreeBayes': ['v_freebayes.txt', r"version: v(\d\.\d\.\d+)"],
+ 'Picard': ['v_picard.txt', r"Picard version:(\d\.\d\.\d+)"],
+ 'Qualimap': ['v_qualimap.txt', r"QualiMap v.(\S+)"],
+ 'R': ['v_r.txt', r"R version (\S+)"],
+ 'samtools': ['v_samtools.txt', r"samtools (\S+)"],
+ 'Sarek': ['v_sarek.txt', r"(\S+)"],
+ 'SnpEff': ['v_snpeff.txt', r"version SnpEff (\S+)"],
+ 'Strelka': ['v_strelka.txt', r"([0-9.]+)"],
+ 'vcftools': ['v_vcftools.txt', r"([0-9.]+)"],
+ 'VEP': ['v_vep.txt', r"ensembl-vep : (\S+)"],
+}
+results = OrderedDict()
+results['Sarek'] = 'N/A'
+results['Nextflow'] = 'N/A'
+results['BWA'] = 'N/A'
+results['samtools'] = 'N/A'
+results['htslib'] = 'N/A'
+results['GATK'] = 'N/A'
+results['Picard'] = 'N/A'
+results['Manta'] = 'N/A'
+results['Strelka'] = 'N/A'
+results['FreeBayes'] = 'N/A'
+results['AlleleCount'] = 'N/A'
+results['R'] = 'N/A'
+results['ASCAT'] = 'N/A'
+results['SnpEff'] = 'N/A'
+results['VEP'] = 'N/A'
+results['FastQC'] = 'N/A'
+results['Qualimap'] = 'N/A'
+results['bcftools'] = 'N/A'
+results['vcftools'] = 'N/A'
+results['MultiQC'] = 'N/A'
+
+# Search each file using its regex
+for k, v in regexes.items():
+ try:
+ with open(v[0]) as x:
+ versions = x.read()
+ match = re.search(v[1], versions)
+ if match:
+ results[k] = "v {}".format(match.group(1))
+ except Exception as FileNotFoundError:
+ print("No such file:", v[0])
+
+# Remove empty keys (defining them above ensures correct order)
+for k in ['Sarek', 'Nextflow', 'BWA', 'samtools', 'htslib', 'GATK', 'Picard', 'Manta', 'Strelka', 'FreeBayes', 'AlleleCount', 'R', 'ASCAT', 'SnpEff', 'VEP', 'FastQC', 'Qualimap', 'bcftools', 'vcftools', 'MultiQC']:
+ if results[k] == 'N/A':
+ del(results[k])
+
+# Dump to YAML
+print ('''
+id: 'Sarek'
+order: -1000
+section_href: 'https://github.com/SciLifeLab/Sarek'
+plot_type: 'html'
+description: 'tool versions are collected at run time from output.'
+data: |
+
+''')
+for k,v in results.items():
+ print(" - {}
- {}
".format(k,v))
+print ("
")
diff --git a/buildReferences.nf b/buildReferences.nf
index 75e6b916d3..33c8863082 100644
--- a/buildReferences.nf
+++ b/buildReferences.nf
@@ -98,27 +98,24 @@ if (params.verbose) ch_decompressedFiles = ch_decompressedFiles.view {
}
ch_fastaFile = Channel.create()
-ch_otherFiles = Channel.create()
-ch_vcfFiles = Channel.create()
+ch_fastaForBWA = Channel.create()
+ch_fastaForPicard = Channel.create()
+ch_fastaForSAMTools = Channel.create()
+ch_otherFile = Channel.create()
+ch_vcfFile = Channel.create()
ch_decompressedFiles
- .choice(ch_fastaFile, ch_vcfFiles, ch_otherFiles) {
+ .choice(ch_fastaFile, ch_vcfFile, ch_otherFile) {
it =~ ".fasta" ? 0 :
it =~ ".vcf" ? 1 : 2}
-(ch_fastaFile, ch_fastaFileToKeep) = ch_fastaFile.into(2)
-(ch_vcfFiles, ch_vcfFilesToKeep) = ch_vcfFiles.into(2)
+(ch_fastaForBWA, ch_fastaForPicard, ch_fastaForSAMTools, ch_fastaFileToKeep) = ch_fastaFile.into(4)
+(ch_vcfFile, ch_vcfFileToKeep) = ch_vcfFile.into(2)
ch_notCompressedfiles
- .mix(ch_otherFiles, ch_fastaFileToKeep, ch_vcfFilesToKeep)
+ .mix(ch_fastaFileToKeep, ch_vcfFileToKeep, ch_otherFile)
.collectFile(storeDir: params.outDir)
-ch_fastaForBWA = Channel.create()
-ch_fastaForPicard = Channel.create()
-ch_fastaForSAMTools = Channel.create()
-
-ch_fastaFile.into(ch_fastaForBWA,ch_fastaForPicard,ch_fastaForSAMTools)
-
process BuildBWAindexes {
tag {f_reference}
@@ -193,7 +190,7 @@ process BuildVCFIndex {
publishDir params.outDir, mode: 'link'
input:
- file(f_reference) from ch_vcfFiles
+ file(f_reference) from ch_vcfFile
output:
file("${f_reference}.idx") into ch_vcfIndex
diff --git a/configuration/containers.config b/configuration/containers.config
index ea772f22d5..a0fcd82ffe 100644
--- a/configuration/containers.config
+++ b/configuration/containers.config
@@ -14,6 +14,21 @@ process {
$BuildVCFIndex.container = "${params.repository}/igvtools:${params.tag}"
$ConcatVCF.container = "${params.repository}/sarek:${params.tag}"
$CreateRecalibrationTable.container = "${params.repository}/gatk:${params.tag}"
+ $GetVersionAll.container = "${params.repository}/qctools:${params.tag}"
+ $GetVersionAlleleCount.container = "${params.repository}/runallelecount:${params.tag}"
+ $GetVersionASCAT.container = "${params.repository}/r-base:${params.tag}"
+ $GetVersionBamQC.container = "${params.repository}/qctools:${params.tag}"
+ $GetVersionBCFtools.container = "${params.repository}/sarek:${params.tag}"
+ $GetVersionBWAsamtools.container = "${params.repository}/sarek:${params.tag}"
+ $GetVersionFastQC.container = "${params.repository}/qctools:${params.tag}"
+ $GetVersionFreeBayes.container = "${params.repository}/freebayes:${params.tag}"
+ $GetVersionGATK.container = "${params.repository}/gatk:${params.tag}"
+ $GetVersionManta.container = "${params.repository}/sarek:${params.tag}"
+ $GetVersionPicard.container = "${params.repository}/picard:${params.tag}"
+ $GetVersionSnpeff.container = {params.genome == 'GRCh38' ? "${params.repository}/snpeffgrch38:${params.tag}" : "${params.repository}/snpeffgrch37:${params.tag}"}
+ $GetVersionStrelka.container = "${params.repository}/sarek:${params.tag}"
+ $GetVersionVCFtools.container = "${params.repository}/qctools:${params.tag}"
+ $GetVersionVEP.container = {params.genome == 'GRCh38' ? "${params.repository}/vepgrch38:${params.tag}" : "${params.repository}/vepgrch37:${params.tag}"}
$IndelRealigner.container = "${params.repository}/gatk:${params.tag}"
$MapReads.container = "${params.repository}/sarek:${params.tag}"
$MarkDuplicates.container = "${params.repository}/picard:${params.tag}"
diff --git a/configuration/singularity-path.config b/configuration/singularity-path.config
index 2257afd2a3..6874c61984 100644
--- a/configuration/singularity-path.config
+++ b/configuration/singularity-path.config
@@ -19,7 +19,21 @@ process {
$BuildVCFIndex.container = "${params.containerPath}/igvtools-${params.tag}.img"
$ConcatVCF.container = "${params.containerPath}/sarek-${params.tag}.img"
$CreateRecalibrationTable.container = "${params.containerPath}/gatk-${params.tag}.img"
- $GenerateMultiQCconfig.container = "${params.containerPath}/qctools-${params.tag}.img"
+ $GetVersionAll.container = "${params.containerPath}/qctools-${params.tag}.img"
+ $GetVersionAlleleCount.container = "${params.containerPath}/runallelecount-${params.tag}.img"
+ $GetVersionASCAT.container = "${params.containerPath}/r-base-${params.tag}.img"
+ $GetVersionBamQC.container = "${params.containerPath}/qctools-${params.tag}.img"
+ $GetVersionBCFtools.container = "${params.containerPath}/sarek-${params.tag}.img"
+ $GetVersionBWAsamtools.container = "${params.containerPath}/sarek-${params.tag}.img"
+ $GetVersionFastQC.container = "${params.containerPath}/qctools-${params.tag}.img"
+ $GetVersionFreeBayes.container = "${params.containerPath}/freebayes-${params.tag}.img"
+ $GetVersionGATK.container = "${params.containerPath}/gatk-${params.tag}.img"
+ $GetVersionManta.container = "${params.containerPath}/sarek-${params.tag}.img"
+ $GetVersionPicard.container = "${params.containerPath}/picard-${params.tag}.img"
+ $GetVersionSnpeff.container = {params.genome == 'GRCh38' ? "${params.containerPath}/snpeffgrch38-${params.tag}.img" : "${params.containerPath}/snpeffgrch37-${params.tag}.img"}
+ $GetVersionStrelka.container = "${params.containerPath}/sarek-${params.tag}.img"
+ $GetVersionVCFtools.container = "${params.containerPath}/qctools-${params.tag}.img"
+ $GetVersionVEP.container = {params.genome == 'GRCh38' ? "${params.containerPath}/vepgrch38-${params.tag}.img" : "${params.containerPath}/vepgrch37-${params.tag}.img"}
$IndelRealigner.container = "${params.containerPath}/gatk-${params.tag}.img"
$MapReads.container = "${params.containerPath}/sarek-${params.tag}.img"
$MarkDuplicates.container = "${params.containerPath}/picard-${params.tag}.img"
diff --git a/germlineVC.nf b/germlineVC.nf
index afb1611d14..f6c87d9a25 100644
--- a/germlineVC.nf
+++ b/germlineVC.nf
@@ -61,7 +61,7 @@ if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project ${bam}.samtools.stats.out
- """
+ script: QC.samtoolsStats(bam)
}
if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view {
@@ -157,14 +154,7 @@ process RunBamQC {
when: !params.noReports && !params.noBAMQC
- script:
- """
- qualimap --java-mem-size=${task.memory.toGiga()}G \
- bamqc \
- -bam ${bam} \
- -outdir ${idSample} \
- -outformat HTML
- """
+ script: QC.bamQC(bam,idSample,task.memory)
}
if (params.verbose) bamQCreport = bamQCreport.view {
@@ -579,10 +569,7 @@ process RunBcftoolsStats {
when: !params.noReports
- script:
- """
- bcftools stats ${vcf} > ${vcf.baseName}.bcf.tools.stats.out
- """
+ script: QC.bcftools(vcf)
}
if (params.verbose) bcfReport = bcfReport.view {
@@ -605,28 +592,7 @@ process RunVcftools {
when: !params.noReports
- script:
- """
- vcftools \
- --gzvcf ${vcf} \
- --relatedness2 \
- --out ${vcf.baseName}
-
- vcftools \
- --gzvcf ${vcf} \
- --TsTv-by-count \
- --out ${vcf.baseName}
-
- vcftools \
- --gzvcf ${vcf} \
- --TsTv-by-qual \
- --out ${vcf.baseName}
-
- vcftools \
- --gzvcf ${vcf} \
- --FILTER-summary \
- --out ${vcf.baseName}
- """
+ script: QC.vcftools(vcf)
}
if (params.verbose) vcfReport = vcfReport.view {
@@ -635,6 +601,42 @@ if (params.verbose) vcfReport = vcfReport.view {
}
vcfReport.close()
+
+process GetVersionGATK {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: 'haplotypecaller' in tools && !params.onlyQC
+ script: QC.getVersionGATK()
+}
+
+process GetVersionStrelka {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: 'strelka' in tools && !params.onlyQC
+ script: QC.getVersionStrelka()
+}
+
+process GetVersionManta {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: 'manta' in tools && !params.onlyQC
+ script: QC.getVersionManta()
+}
+
+process GetVersionBCFtools {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: !params.noReports
+ script: QC.getVersionBCFtools()
+}
+
+process GetVersionVCFtools {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: !params.noReports
+ script: QC.getVersionVCFtools()
+}
+
/*
================================================================================
= F U N C T I O N S =
@@ -690,24 +692,6 @@ def checkUppmaxProject() {
return !(workflow.profile == 'slurm' && !params.project)
}
-def defineDirectoryMap() {
- return [
- 'recalibrated' : "${params.outDir}/Preprocessing/Recalibrated",
- 'bamQC' : "${params.outDir}/Reports/bamQC",
- 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats",
- 'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats",
- 'vcftools' : "${params.outDir}/Reports/VCFTools",
- 'ascat' : "${params.outDir}/VariantCalling/Ascat",
- 'freebayes' : "${params.outDir}/VariantCalling/FreeBayes",
- 'gvcf-hc' : "${params.outDir}/VariantCalling/HaplotypeCallerGVCF",
- 'haplotypecaller' : "${params.outDir}/VariantCalling/HaplotypeCaller",
- 'manta' : "${params.outDir}/VariantCalling/Manta",
- 'mutect1' : "${params.outDir}/VariantCalling/MuTect1",
- 'mutect2' : "${params.outDir}/VariantCalling/MuTect2",
- 'strelka' : "${params.outDir}/VariantCalling/Strelka"
- ]
-}
-
def defineReferenceMap() {
if (!(params.genome in params.genomes)) exit 1, "Genome ${params.genome} not found in configuration"
return [
diff --git a/lib/QC.groovy b/lib/QC.groovy
new file mode 100644
index 0000000000..96ef79fa6f
--- /dev/null
+++ b/lib/QC.groovy
@@ -0,0 +1,100 @@
+class QC {
+// Run bamQC on vcf file
+ static def bamQC(bam, idSample, mem) {
+ """
+ qualimap --java-mem-size=${mem.toGiga()}G \
+ bamqc \
+ -bam ${bam} \
+ -outdir ${idSample} \
+ -outformat HTML
+ """
+ }
+
+// Run bcftools on vcf file
+ static def bcftools(vcf) {
+ """
+ bcftools stats ${vcf} > ${vcf.baseName}.bcf.tools.stats.out
+ """
+ }
+
+// Run samtools stats on bam file
+ static def samtoolsStats(bam) {
+ """
+ samtools stats ${bam} > ${bam}.samtools.stats.out
+ """
+ }
+
+// Run vcftools on vcf file
+ static def vcftools(vcf) {
+ """
+ vcftools \
+ --gzvcf ${vcf} \
+ --relatedness2 \
+ --out ${vcf.baseName}
+
+ vcftools \
+ --gzvcf ${vcf} \
+ --TsTv-by-count \
+ --out ${vcf.baseName}
+
+ vcftools \
+ --gzvcf ${vcf} \
+ --TsTv-by-qual \
+ --out ${vcf.baseName}
+
+ vcftools \
+ --gzvcf ${vcf} \
+ --FILTER-summary \
+ --out ${vcf.baseName}
+ """
+ }
+
+// Get BCFtools version
+ static def getVersionBCFtools() {
+ """
+ bcftools version > v_bcftools.txt
+ """
+ }
+
+// Get GATK version
+ static def getVersionGATK() {
+ """
+ echo "GATK version"\$(java -jar \$GATK_HOME/GenomeAnalysisTK.jar --version 2>&1) > v_gatk.txt
+ """
+ }
+
+// Get Manta version
+ static def getVersionManta() {
+ """
+ cat \$MANTA_INSTALL_PATH/lib/python/configBuildTimeInfo.py | grep workflowVersion > v_manta.txt
+ """
+ }
+
+// Get SnpEFF version
+ static def getVersionSnpEFF() {
+ """
+ echo "SNPEFF version"\$(java -jar \$SNPEFF_HOME/snpEff.jar -h 2>&1) > v_snpeff.txt
+ """
+ }
+
+// Get Strelka version
+ static def getVersionStrelka() {
+ """
+ cat \$STRELKA_INSTALL_PATH/lib/python/configBuildTimeInfo.py | grep workflowVersion > v_strelka.txt
+ """
+ }
+
+// Get VCFtools version
+ static def getVersionVCFtools() {
+ """
+ vcftools --version > v_vcftools.txt
+ """
+ }
+
+// Get VEP version
+ static def getVersionVEP() {
+ """
+ vep --help > v_vep.txt
+ """
+ }
+}
diff --git a/lib/SarekUtils.groovy b/lib/SarekUtils.groovy
index 5af73fcadc..ba59b3bf06 100644
--- a/lib/SarekUtils.groovy
+++ b/lib/SarekUtils.groovy
@@ -1,6 +1,6 @@
class MyUtils {
+ // Check if params is in this given list
static def checkParams(it) {
- // Check if params is in this given list
return it in [
'annotate-tools',
'annotate-VCF',
@@ -67,13 +67,13 @@ class MyUtils {
'version']
}
+ // Loop through all parameters to check their existence and spelling
static def checkParameterList(list, realList) {
- // Loop through all parameters to check their existence and spelling
return list.every{ checkParameterExistence(it, realList) }
}
+ // Check parameter existence
static def checkParameterExistence(it, list) {
- // Check parameter existence
if (!list.contains(it)) {
println("Unknown parameter: ${it}")
return false
@@ -81,8 +81,37 @@ class MyUtils {
return true
}
+ // Define map of directories
+ static def defineDirectoryMap(outDir) {
+ return [
+ 'nonRealigned' : "${outDir}/Preprocessing/NonRealigned",
+ 'nonRecalibrated' : "${outDir}/Preprocessing/NonRecalibrated",
+ 'recalibrated' : "${outDir}/Preprocessing/Recalibrated",
+ 'ascat' : "${outDir}/VariantCalling/Ascat",
+ 'freebayes' : "${outDir}/VariantCalling/FreeBayes",
+ 'gvcf-hc' : "${outDir}/VariantCalling/HaplotypeCallerGVCF",
+ 'haplotypecaller' : "${outDir}/VariantCalling/HaplotypeCaller",
+ 'manta' : "${outDir}/VariantCalling/Manta",
+ 'mutect1' : "${outDir}/VariantCalling/MuTect1",
+ 'mutect2' : "${outDir}/VariantCalling/MuTect2",
+ 'strelka' : "${outDir}/VariantCalling/Strelka",
+ 'strelkabp' : "${outDir}/VariantCalling/StrelkaBP",
+ 'snpeff' : "${outDir}/Annotation/SnpEff",
+ 'vep' : "${outDir}/Annotation/VEP",
+ 'bamQC' : "${outDir}/Reports/bamQC",
+ 'bcftoolsStats' : "${outDir}/Reports/BCFToolsStats",
+ 'fastQC' : "${outDir}/Reports/FastQC",
+ 'markDuplicatesQC' : "${outDir}/Reports/MarkDuplicates",
+ 'multiQC' : "${outDir}/Reports/MultiQC",
+ 'samtoolsStats' : "${outDir}/Reports/SamToolsStats",
+ 'snpeffReports' : "${outDir}/Reports/SnpEff",
+ 'vcftools' : "${outDir}/Reports/VCFTools",
+ 'version' : "${outDir}/Reports/ToolsVersion"
+ ]
+ }
+
+ // Compare params to list of verified params
static def isAllowedParams(params) {
- // Compare params to list of verified params
final test = true
params.each{
if (!checkParams(it.toString().split('=')[0])) {
diff --git a/main.nf b/main.nf
index 52711628ea..8fc1baaaa5 100644
--- a/main.nf
+++ b/main.nf
@@ -63,7 +63,7 @@ if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project ${bam}.samtools.stats.out
- """
+ script: QC.samtoolsStats(bam)
}
if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view {
@@ -575,14 +576,7 @@ process RunBamQC {
when: !params.noReports && !params.noBAMQC
- script:
- """
- qualimap --java-mem-size=${task.memory.toGiga()}G \
- bamqc \
- -bam ${bam} \
- -outdir ${idSample} \
- -outformat HTML
- """
+ script: QC.bamQC(bam,idSample,task.memory)
}
if (params.verbose) bamQCreport = bamQCreport.view {
@@ -590,6 +584,59 @@ if (params.verbose) bamQCreport = bamQCreport.view {
Dir : [${it.fileName}]"
}
+process GetVersionBamQC {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: !params.noReports && !params.noBAMQC
+
+ script:
+ """
+ qualimap --version &> v_qualimap.txt
+ """
+}
+
+process GetVersionBWAsamtools {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: step == 'mapping' && !params.onlyQC
+
+ script:
+ """
+ bwa &> v_bwa.txt 2>&1 || true
+ samtools --version &> v_samtools.txt
+ """
+}
+
+process GetVersionFastQC {
+ publishDir directoryMap.version, mode: 'link'
+ output:
+ file("v_fastqc.txt")
+ when: step == 'mapping' && !params.noReports
+
+ script:
+ """
+ fastqc -v > v_fastqc.txt
+ """
+}
+
+process GetVersionGATK {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: !params.onlyQC
+ script: QC.getVersionGATK()
+}
+
+process GetVersionPicard {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: step == 'mapping' && !params.onlyQC
+
+ script:
+ """
+ echo "Picard version:"\$(java -jar \$PICARD_HOME/picard.jar MarkDuplicates --version 2>&1) > v_picard.txt
+ """
+}
+
/*
================================================================================
= F U N C T I O N S =
@@ -646,19 +693,6 @@ def checkExactlyOne(list) {
return n == 1
}
-def defineDirectoryMap() {
- return [
- 'nonRealigned' : "${params.outDir}/Preprocessing/NonRealigned",
- 'nonRecalibrated' : "${params.outDir}/Preprocessing/NonRecalibrated",
- 'recalibrated' : "${params.outDir}/Preprocessing/Recalibrated",
- 'bamQC' : "${params.outDir}/Reports/bamQC",
- 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats",
- 'fastQC' : "${params.outDir}/Reports/FastQC",
- 'markDuplicatesQC' : "${params.outDir}/Reports/MarkDuplicates",
- 'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats"
- ]
-}
-
def defineReferenceMap() {
if (!(params.genome in params.genomes)) exit 1, "Genome ${params.genome} not found in configuration"
return [
diff --git a/runMultiQC.nf b/runMultiQC.nf
index fe90ce222f..5e87625424 100644
--- a/runMultiQC.nf
+++ b/runMultiQC.nf
@@ -52,7 +52,7 @@ if (params.help) exit 0, helpMessage()
if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information"
if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project "
-directoryMap = defineDirectoryMap()
+directoryMap = SarekUtils.defineDirectoryMap(params.outDir)
/*
================================================================================
= P R O C E S S E S =
@@ -61,41 +61,28 @@ directoryMap = defineDirectoryMap()
startMessage()
-process GenerateMultiQCconfig {
+process GetVersionAll {
publishDir directoryMap.multiQC, mode: 'link'
input:
+ file(versions) from Channel.fromPath("${directoryMap.version}/*").collect()
output:
- file("multiqc_config.yaml") into multiQCconfig
+ file ("tool_versions_mqc.yaml") into versionsForMultiQC
when: !params.noReports
script:
"""
- touch multiqc_config.yaml
- echo "custom_logo: ${baseDir}/doc/images/Sarek_no_Border.png" >> multiqc_config.yaml
- echo "custom_logo_url: http://opensource.scilifelab.se/projects/sarek" >> multiqc_config.yaml
- echo "custom_logo_title: 'Sarek'" >> multiqc_config.yaml
- echo "report_header_info:" >> multiqc_config.yaml
- echo "- Sarek version: ${params.version}" >> multiqc_config.yaml
- echo "- Contact Name: ${params.callName}" >> multiqc_config.yaml
- echo "- Contact E-mail: ${params.contactMail}" >> multiqc_config.yaml
- echo "- Directory: ${workflow.launchDir}" >> multiqc_config.yaml
- echo "- Genome: "${params.genome} >> multiqc_config.yaml
- echo "top_modules:" >> multiqc_config.yaml
- echo "- 'fastqc'" >> multiqc_config.yaml
- echo "- 'picard'" >> multiqc_config.yaml
- echo "- 'samtools'" >> multiqc_config.yaml
- echo "- 'qualimap'" >> multiqc_config.yaml
- echo "- 'bcftools'" >> multiqc_config.yaml
- echo "- 'vcftools'" >> multiqc_config.yaml
- echo "- 'snpeff'" >> multiqc_config.yaml
+ echo "${params.version}" &> v_sarek.txt
+ echo "${workflow.nextflow.version}" &> v_nextflow.txt
+ multiqc --version &> v_multiqc.txt
+ scrape_tool_versions.py &> tool_versions_mqc.yaml
"""
}
-if (params.verbose && !params.noReports) multiQCconfig = multiQCconfig.view {
- "MultiQC config:\n\
+if (params.verbose && !params.noReports) versionsForMultiQC = versionsForMultiQC.view {
+ "MultiQC tools version:\n\
File : [${it.fileName}]"
}
@@ -108,14 +95,15 @@ reportsForMultiQC = Channel.empty()
Channel.fromPath("${directoryMap.samtoolsStats}/*"),
Channel.fromPath("${directoryMap.snpeffReports}/*"),
Channel.fromPath("${directoryMap.vcftools}/*"),
- multiQCconfig
).collect()
process RunMultiQC {
publishDir directoryMap.multiQC, mode: 'link'
input:
- file ('*') from reportsForMultiQC
+ file (multiqcConfig) from createMultiQCconfig()
+ file (reports) from reportsForMultiQC
+ file (versions) from versionsForMultiQC
output:
set file("*multiqc_report.html"), file("*multiqc_data") into multiQCReport
@@ -145,17 +133,27 @@ def checkUppmaxProject() {
return !(workflow.profile == 'slurm' && !params.project)
}
-def defineDirectoryMap() {
- return [
- 'bamQC' : "${params.outDir}/Reports/bamQC",
- 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats",
- 'fastQC' : "${params.outDir}/Reports/FastQC",
- 'markDuplicatesQC' : "${params.outDir}/Reports/MarkDuplicates",
- 'multiQC' : "${params.outDir}/Reports/MultiQC",
- 'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats",
- 'snpeffReports' : "${params.outDir}/Reports/SnpEff",
- 'vcftools' : "${params.outDir}/Reports/VCFTools"
- ]
+def createMultiQCconfig() {
+ def file = workDir.resolve('multiqc_config.yaml')
+ file.text = """
+ custom_logo: ${baseDir}/doc/images/Sarek_no_Border.png
+ custom_logo_url: http://opensource.scilifelab.se/projects/sarek
+ custom_logo_title: 'Sarek'
+ report_header_info:
+ - Contact Name: ${params.callName}
+ - Contact E-mail: ${params.contactMail}
+ - Genome: ${params.genome}
+ top_modules:
+ - 'fastqc'
+ - 'picard'
+ - 'samtools'
+ - 'qualimap'
+ - 'bcftools'
+ - 'vcftools'
+ - 'snpeff'
+ """.stripIndent()
+
+ return file
}
def grabRevision() {
diff --git a/scripts/test.sh b/scripts/test.sh
index 71f164a900..ccc595dc6f 100755
--- a/scripts/test.sh
+++ b/scripts/test.sh
@@ -55,7 +55,7 @@ function clean_repo() {
if [[ $TRAVIS == false ]] && [[ $KEEP == false ]]
then
echo "$(tput setaf 1)Cleaning directory$(tput sgr0)"
- rm -rf work .nextflow* Preprocessing Reports Annotation VariantCalling Results
+ rm -rf work .nextflow* Annotation Preprocessing Reports Results VariantCalling
fi
}
diff --git a/somaticVC.nf b/somaticVC.nf
index 20d15b7e92..a8f698876b 100644
--- a/somaticVC.nf
+++ b/somaticVC.nf
@@ -66,7 +66,7 @@ if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project ${bam}.samtools.stats.out
- """
+ script: QC.samtoolsStats(bam)
}
if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view {
@@ -162,14 +159,7 @@ process RunBamQC {
when: !params.noReports && !params.noBAMQC
- script:
- """
- qualimap --java-mem-size=${task.memory.toGiga()}G \
- bamqc \
- -bam ${bam} \
- -outdir ${idSample} \
- -outformat HTML
- """
+ script: QC.bamQC(bam,idSample,task.memory)
}
if (params.verbose) bamQCreport = bamQCreport.view {
@@ -826,10 +816,7 @@ process RunBcftoolsStats {
when: !params.noReports
- script:
- """
- bcftools stats ${vcf} > ${vcf.baseName}.bcf.tools.stats.out
- """
+ script: QC.bcftools(vcf)
}
if (params.verbose) bcfReport = bcfReport.view {
@@ -852,28 +839,7 @@ process RunVcftools {
when: !params.noReports
- script:
- """
- vcftools \
- --gzvcf ${vcf} \
- --relatedness2 \
- --out ${vcf.baseName}
-
- vcftools \
- --gzvcf ${vcf} \
- --TsTv-by-count \
- --out ${vcf.baseName}
-
- vcftools \
- --gzvcf ${vcf} \
- --TsTv-by-qual \
- --out ${vcf.baseName}
-
- vcftools \
- --gzvcf ${vcf} \
- --FILTER-summary \
- --out ${vcf.baseName}
- """
+ script: QC.vcftools(vcf)
}
if (params.verbose) vcfReport = vcfReport.view {
@@ -882,6 +848,76 @@ if (params.verbose) vcfReport = vcfReport.view {
}
vcfReport.close()
+
+process GetVersionGATK {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: !params.onlyQC
+ script: QC.getVersionGATK()
+}
+
+process GetVersionFreeBayes {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: 'freebayes' in tools && !params.onlyQC
+
+ script:
+ """
+ freebayes --version > v_freebayes.txt
+ """
+}
+
+process GetVersionAlleleCount {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: 'ascat' in tools && !params.onlyQC
+
+ script:
+ """
+ alleleCounter --version > v_allelecount.txt
+ """
+}
+
+process GetVersionASCAT {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: 'ascat' in tools && !params.onlyQC
+
+ script:
+ """
+ R --version > v_r.txt
+ cat ${baseDir}/scripts/ascat.R | grep "ASCAT version" > v_ascat.txt
+ """
+}
+
+process GetVersionStrelka {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: 'strelka' in tools && !params.onlyQC
+ script: QC.getVersionStrelka()
+}
+
+process GetVersionManta {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: 'manta' in tools && !params.onlyQC
+ script: QC.getVersionManta()
+}
+
+process GetVersionBCFtools {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: !params.noReports
+ script: QC.getVersionBCFtools()
+}
+
+process GetVersionVCFtools {
+ publishDir directoryMap.version, mode: 'link'
+ output: file("v_*.txt")
+ when: !params.noReports
+ script: QC.getVersionVCFtools()
+}
+
/*
================================================================================
= F U N C T I O N S =
@@ -937,23 +973,6 @@ def checkUppmaxProject() {
return !(workflow.profile == 'slurm' && !params.project)
}
-def defineDirectoryMap() {
- return [
- 'recalibrated' : "${params.outDir}/Preprocessing/Recalibrated",
- 'bamQC' : "${params.outDir}/Reports/bamQC",
- 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats",
- 'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats",
- 'vcftools' : "${params.outDir}/Reports/VCFTools",
- 'ascat' : "${params.outDir}/VariantCalling/Ascat",
- 'freebayes' : "${params.outDir}/VariantCalling/FreeBayes",
- 'manta' : "${params.outDir}/VariantCalling/Manta",
- 'mutect1' : "${params.outDir}/VariantCalling/MuTect1",
- 'mutect2' : "${params.outDir}/VariantCalling/MuTect2",
- 'strelka' : "${params.outDir}/VariantCalling/Strelka",
- 'strelkabp' : "${params.outDir}/VariantCalling/StrelkaBP"
- ]
-}
-
def defineReferenceMap() {
if (!(params.genome in params.genomes)) exit 1, "Genome ${params.genome} not found in configuration"
return [