diff --git a/CHANGELOG.md b/CHANGELOG.md index be1821cb50..1c4963826a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,18 +10,22 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Added` - [#671](https://github.com/SciLifeLab/Sarek/pull/671) - New `publishDirMode` param and docs -- [#673](https://github.com/SciLifeLab/Sarek/pull/673) - Profiles for BinAC and CFC clusters in Tübingen +- [#673](https://github.com/SciLifeLab/Sarek/pull/673), [#675](https://github.com/SciLifeLab/Sarek/pull/675), [#676](https://github.com/SciLifeLab/Sarek/pull/676) - Profiles for BinAC and CFC clusters in Tübingen +- [#679](https://github.com/SciLifeLab/Sarek/pull/679) - Add container for `CreateIntervalBeds` ### `Changed` - [#678](https://github.com/SciLifeLab/Sarek/pull/678) - Changing VEP to v92 and adjusting CPUs for VEP - [#663](https://github.com/SciLifeLab/Sarek/pull/663) - Update `do_release.sh` script - [#671](https://github.com/SciLifeLab/Sarek/pull/671) - publishDir modes are now params +- [#677](https://github.com/SciLifeLab/Sarek/pull/677) - Update docs +- [#679](https://github.com/SciLifeLab/Sarek/pull/679) - Update old awsbatch configuration ### `Fixed` - [#665](https://github.com/SciLifeLab/Sarek/pull/665) - Input bam file now has always the same name (whether it is from a single fastq pair or multiple) in the MarkDuplicates process, so metrics too - [#672](https://github.com/SciLifeLab/Sarek/pull/672) - process `PullSingularityContainers` from `buildContainers.nf` now expect a file with the correct `.simg` extension for singularity images, and no longer the `.img` one. +- [#679](https://github.com/SciLifeLab/Sarek/pull/679) - Add publishDirMode for `germlineVC.nf` ## [2.2.1] - 2018-10-04 diff --git a/Sarek-data b/Sarek-data index c2da0d2a8a..d1f1848688 160000 --- a/Sarek-data +++ b/Sarek-data @@ -1 +1 @@ -Subproject commit c2da0d2a8a1c1a8e9b9b0930b84e34073ea43d03 +Subproject commit d1f1848688d2f0a4f8c792373c3002ab046063c0 diff --git a/annotate.nf b/annotate.nf index 7ae5fb828a..1599239767 100644 --- a/annotate.nf +++ b/annotate.nf @@ -215,7 +215,7 @@ process RunVEP { script: finalannotator = annotator == "snpeff" ? 'merge' : 'vep' genome = params.genome == 'smallGRCh37' ? 'GRCh37' : params.genome - cache_version = params.genome == 'GRCh38' ? 92 : 91 + cache_version = params.genome == 'GRCh38' || params.genome == 'iGRCh38' ? 92 : 91 """ /opt/vep/src/ensembl-vep/vep --dir /opt/vep/.vep/ \ -i ${vcf} \ diff --git a/conf/aws-batch.config b/conf/aws-batch.config index 4c4c837610..ec8a9e9139 100644 --- a/conf/aws-batch.config +++ b/conf/aws-batch.config @@ -8,7 +8,8 @@ */ params { - genome_base = params.genome == 'GRCh37' ? "s3://caw-references/grch37" : params.genome == 'GRCh38' ? "s3://caw-references/grch38" : "s3://caw-references/smallgrch37" + genome_base = params.genome == 'GRCh37' ? "s3://sarek-references/Homo_sapiens/GATK/GRCh37" : params.genome == 'iGRCh38' ? "s3://sarek-references/Homo_sapiens/GATK/GRCh38" : "s3://sarek-references/small" + publishDirMode = 'copy' } executor.name = 'awsbatch' @@ -16,7 +17,7 @@ executor.awscli = '/home/ec2-user/miniconda/bin/aws' process { executor = 'awsbatch' - queue = 'caw-job-queue' + queue = 'Sarek-queue' errorStrategy = {task.exitStatus == 143 ? 'retry' : 'terminate'} maxErrors = '-1' diff --git a/conf/containers.config b/conf/containers.config index 6e0ab0a1ad..ad2becd9e5 100644 --- a/conf/containers.config +++ b/conf/containers.config @@ -26,6 +26,9 @@ process { withName:ConcatVCF { container = "${params.repository}/sarek:${params.tag}" } + withName:CreateIntervalBeds { + container = "${params.repository}/sarek:${params.tag}" + } withName:CreateRecalibrationTable { container = "${params.repository}/sarek:${params.tag}" } diff --git a/conf/genomes.config b/conf/genomes.config index 53ceecf9cf..b736bc9e4f 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -15,8 +15,6 @@ params { genomes { 'GRCh37' { acLoci = "${params.genome_base}/1000G_phase3_20130502_SNP_maf0.3.loci" - cosmic = "${params.genome_base}/GRCh37_Cosmic_v83.vcf" - cosmicIndex = "${cosmic}.idx" dbsnp = "${params.genome_base}/dbsnp_138.b37.vcf" dbsnpIndex = "${dbsnp}.idx" genomeFile = "${params.genome_base}/human_g1k_v37_decoy.fasta" @@ -30,8 +28,6 @@ params { } 'GRCh38' { acLoci = "${params.genome_base}/1000G_phase3_GRCh38_maf0.3.loci" - cosmic = "${params.genome_base}/COSMICv80.vcf" - cosmicIndex = "${cosmic}.idx" dbsnp = "${params.genome_base}/dbsnp_146.hg38.vcf.gz" dbsnpIndex = "${dbsnp}.tbi" genomeFile = "${params.genome_base}/Homo_sapiens_assembly38.fasta" @@ -43,13 +39,24 @@ params { knownIndelsIndex = "${params.genome_base}/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" snpeffDb = "GRCh38.86" // This a nasty-looking list of allele-frequencies files. Add/remove files to match to your sets - //AF_files = "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf" - //AF_indexes = "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf.idx" + //AF_files = "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf" + //AF_indexes = "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf.idx" + } + 'iGRCh38' { + acLoci = "${params.genome_base}/Annotation/ASCAT/1000G_phase3_GRCh38_maf0.3.loci" + dbsnp = "${params.genome_base}/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz" + dbsnpIndex = "${params.genome_base}/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi" + genomeFile = "${params.genome_base}/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta" + genomeDict = "${params.genome_base}/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict" + genomeIndex = "${params.genome_base}/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai" + bwaIndex = "${params.genome_base}/Sequence/BWAIndex/Homo_sapiens_assembly38.fasta.64.{alt,amb,ann,bwt,pac,sa}" + intervals = "${params.genome_base}/Annotation/intervals/wgs_calling_regions.hg38.bed" + knownIndels = "${params.genome_base}/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,Homo_sapiens_assembly38.known_indels}.vcf.gz" + knownIndelsIndex = "${params.genome_base}/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" + snpeffDb = "GRCh38.86" } 'smallGRCh37' { acLoci = "${params.genome_base}/1000G_phase3_20130502_SNP_maf0.3.small.loci" - cosmic = "${params.genome_base}/b37_cosmic_v74.noCHR.sort.4.1.small.vcf" - cosmicIndex = "${cosmic}.idx" dbsnp = "${params.genome_base}/dbsnp_138.b37.small.vcf" dbsnpIndex = "${dbsnp}.idx" genomeFile = "${params.genome_base}/human_g1k_v37_decoy.small.fasta" diff --git a/conf/singularity-path.config b/conf/singularity-path.config index 448e7d1432..d41cf45059 100644 --- a/conf/singularity-path.config +++ b/conf/singularity-path.config @@ -31,6 +31,9 @@ process { withName:ConcatVCF { container = "${params.containerPath}/sarek-${params.tag}.simg" } + withName:CreateIntervalBeds { + container = "${params.containerPath}/sarek-${params.tag}.simg" + } withName:CreateRecalibrationTable { container = "${params.containerPath}/sarek-${params.tag}.simg" } diff --git a/germlineVC.nf b/germlineVC.nf index 1cc9f39ddc..7b53d1006c 100644 --- a/germlineVC.nf +++ b/germlineVC.nf @@ -104,7 +104,7 @@ if (params.verbose) recalibratedBam = recalibratedBam.view { process RunSamtoolsStats { tag {idPatient + "-" + idSample} - publishDir directoryMap.samtoolsStats, mode: 'link' + publishDir directoryMap.samtoolsStats, mode: params.publishDirMode input: set idPatient, status, idSample, file(bam), file(bai) from bamForSamToolsStats @@ -125,7 +125,7 @@ if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view { process RunBamQC { tag {idPatient + "-" + idSample} - publishDir directoryMap.bamQC, mode: 'link' + publishDir directoryMap.bamQC, mode: params.publishDirMode input: set idPatient, status, idSample, file(bam), file(bai) from bamForBamQC @@ -356,7 +356,7 @@ if (params.verbose) vcfsToMerge = vcfsToMerge.view { process ConcatVCF { tag {variantCaller + "-" + idSampleNormal} - publishDir "${directoryMap."$variantCaller"}", mode: 'link' + publishDir "${directoryMap."$variantCaller"}", mode: params.publishDirMode input: set variantCaller, idPatient, idSampleNormal, idSampleTumor, file(vcFiles) from vcfsToMerge @@ -394,7 +394,7 @@ if (params.verbose) vcfConcatenated = vcfConcatenated.view { process RunSingleStrelka { tag {idSample} - publishDir directoryMap.strelka, mode: 'link' + publishDir directoryMap.strelka, mode: params.publishDirMode input: set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleStrelka @@ -447,7 +447,7 @@ if (params.verbose) singleStrelkaOutput = singleStrelkaOutput.view { process RunSingleManta { tag {idSample + " - Single Diploid"} - publishDir directoryMap.manta, mode: 'link' + publishDir directoryMap.manta, mode: params.publishDirMode input: set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleManta @@ -511,7 +511,7 @@ vcfForQC = Channel.empty().mix( process RunBcftoolsStats { tag {vcf} - publishDir directoryMap.bcftoolsStats, mode: 'link' + publishDir directoryMap.bcftoolsStats, mode: params.publishDirMode input: set variantCaller, file(vcf) from vcfForBCFtools @@ -534,7 +534,7 @@ bcfReport.close() process RunVcftools { tag {vcf} - publishDir directoryMap.vcftools, mode: 'link' + publishDir directoryMap.vcftools, mode: params.publishDirMode input: set variantCaller, file(vcf) from vcfForVCFtools diff --git a/somaticVC.nf b/somaticVC.nf index 89a814da2c..9a99aebebd 100644 --- a/somaticVC.nf +++ b/somaticVC.nf @@ -279,14 +279,12 @@ process RunMutect2 { input: set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(intervalBed) from bamsFMT2 - set file(genomeFile), file(genomeIndex), file(genomeDict), file(dbsnp), file(dbsnpIndex), file(cosmic), file(cosmicIndex) from Channel.value([ + set file(genomeFile), file(genomeIndex), file(genomeDict), file(dbsnp), file(dbsnpIndex) from Channel.value([ referenceMap.genomeFile, referenceMap.genomeIndex, referenceMap.genomeDict, referenceMap.dbsnp, - referenceMap.dbsnpIndex, - referenceMap.cosmic, - referenceMap.cosmicIndex + referenceMap.dbsnpIndex ]) output: @@ -832,9 +830,6 @@ def defineReferenceMap() { 'acLoci' : checkParamReturnFile("acLoci"), 'dbsnp' : checkParamReturnFile("dbsnp"), 'dbsnpIndex' : checkParamReturnFile("dbsnpIndex"), - // cosmic VCF with VCF4.1 header - 'cosmic' : checkParamReturnFile("cosmic"), - 'cosmicIndex' : checkParamReturnFile("cosmicIndex"), // genome reference dictionary 'genomeDict' : checkParamReturnFile("genomeDict"), // FASTA genome reference @@ -923,8 +918,6 @@ def minimalInformationMessage() { log.info " Tag : " + params.tag log.info "Reference files used:" log.info " acLoci :\n\t" + referenceMap.acLoci - log.info " cosmic :\n\t" + referenceMap.cosmic - log.info "\t" + referenceMap.cosmicIndex log.info " dbsnp :\n\t" + referenceMap.dbsnp log.info "\t" + referenceMap.dbsnpIndex log.info " genome :\n\t" + referenceMap.genomeFile