diff --git a/.github/workflows/nf-core_eager.yml b/.github/workflows/nf-core_eager.yml
index 71eec1006..7213d06af 100644
--- a/.github/workflows/nf-core_eager.yml
+++ b/.github/workflows/nf-core_eager.yml
@@ -86,15 +86,15 @@ jobs:
- name: SKIPPING Test checking all skip steps work i.e. input bam, skipping straight to genotyping
run: |
nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-skipping_logic" -profile test_bam,docker --bam --singleEnd --skip_fastqc --skip_adapterremoval --skip_mapping --skip_deduplication --skip_qualimap --skip_preseq --skip_damage_calculation --run_genotyping --genotyping_tool 'freebayes'
- - name: TRIM_BAM/PMD/GENOTYPING_UG/MULTIVCFANALYZER Test running PMDTools, TrimBam, GATK UnifiedGenotyper and MultiVCFAnalyzer
- run: |
- nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-pmd_trimbam_gatkUG_MVA" -profile test,docker --pairedEnd --dedupper 'dedup' --run_trim_bam --run_pmdtools --run_genotyping --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
- - name: GENOTYPING_UG/PMD/MULTIVCFANALYZER Test running GATK UnifiedGenotyper and MultiVCFAnalyzer, additional VCFS
- run: |
- nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-MVA_additionalvcfs" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
- - name: VCF2Genome Run basic pipeline with GATK unifiedgenotyper and run VCF2Genome
- run: |
- nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-vcf2genome" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --genotyping_source 'raw' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_vcf2genome
+ #- name: TRIM_BAM/PMD/GENOTYPING_UG/MULTIVCFANALYZER Test running PMDTools, TrimBam, GATK UnifiedGenotyper and MultiVCFAnalyzer
+ # run: |
+ # nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-pmd_trimbam_gatkUG_MVA" -profile test,docker --pairedEnd --dedupper 'dedup' --run_trim_bam --run_pmdtools --run_genotyping --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
+ #- name: GENOTYPING_UG/PMD/MULTIVCFANALYZER Test running GATK UnifiedGenotyper and MultiVCFAnalyzer, additional VCFS
+ # run: |
+ # nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-MVA_additionalvcfs" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
+ #- name: VCF2Genome Run basic pipeline with GATK unifiedgenotyper and run VCF2Genome
+ # run: |
+ # nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-vcf2genome" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --genotyping_source 'raw' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_vcf2genome
- name: BAM_INPUT Run the basic pipeline with the bam input profile, skip AdapterRemoval as no convertBam
run: |
nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-baminput_noConvertBam" -profile test_bam,docker --bam --skip_adapterremoval --run_convertbam
diff --git a/.travis.yml b/.travis.yml
index b12effff7..eef48337b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -78,11 +78,11 @@ script:
# SKIPPING: Test checking all skip steps work i.e. input bam, skipping straight to genotyping
- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-skipping_logic" -profile test_bam,docker --bam --singleEnd --skip_fastqc --skip_adapterremoval --skip_mapping --skip_deduplication --skip_qualimap --skip_preseq --skip_damage_calculation --run_genotyping --genotyping_tool 'freebayes'
# TRIM_BAM/PMD/GENOTYPING_UG/MULTIVCFANALYZER: Test running PMDTools, TrimBam, GATK UnifiedGenotyper and MultiVCFAnalyzer
- - nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-pmd_trimbam_unifiedgenotyper_multivcfanalyzer" -profile test,docker --pairedEnd --dedupper 'dedup' --run_trim_bam --run_pmdtools --run_genotyping --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
+ #- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-pmd_trimbam_unifiedgenotyper_multivcfanalyzer" -profile test,docker --pairedEnd --dedupper 'dedup' --run_trim_bam --run_pmdtools --run_genotyping --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
# GENOTYPING_UG/PMD/MULTIVCFANALYZER: Test running GATK UnifiedGenotyper and MultiVCFAnalyzer, additional VCFS
- - nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-multivcfanalyzer_additionalvcfs" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/jfy133/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
+ #- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-multivcfanalyzer_additionalvcfs" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/jfy133/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
# VCF2GENOME: Test running GATK UnifiedGenotyper and run VCF2GENOME
- - nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-vcf2genome" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --genotyping_source 'raw' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_vcf2genome
+ #- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-vcf2genome" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --genotyping_source 'raw' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_vcf2genome
# BAM_INPUT: Run the basic pipeline with the bam input profile, skip AdapterRemoval as no convertBam
- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-baminput_noConvertBam" -profile test_bam,docker --bam --skip_adapterremoval --run_convertbam
# BAM_INPUT: Run the basic pipeline with the bam input profile, convert to FASTQ for adapterremoval test and downstream
diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py
index f03211b40..a6114aacb 100755
--- a/bin/scrape_software_versions.py
+++ b/bin/scrape_software_versions.py
@@ -15,7 +15,7 @@
'BWA': ['v_bwa.txt', r"Version: (\S+)"],
'Qualimap': ['v_qualimap.txt', r"QualiMap v.(\S+)"],
'GATK HaplotypeCaller': ['v_gatk.txt', r" v(\S+)"],
- 'GATK UnifiedGenotyper': ['v_gatk3_5.txt', r"version (\S+)"],
+ #'GATK UnifiedGenotyper': ['v_gatk3_5.txt', r"version (\S+)"],
'bamUtil' : ['v_bamutil.txt', r"Version: (\S+);"],
'fastP': ['v_fastp.txt', r"([\d\.]+)"],
'DamageProfiler' : ['v_damageprofiler.txt', r"DamageProfiler v(\S+)"],
@@ -47,7 +47,7 @@
results['Qualimap'] = 'N/A'
results['Preseq'] = 'N/A'
results['GATK HaplotypeCaller'] = 'N/A'
-results['GATK UnifiedGenotyper'] = 'N/A'
+#results['GATK UnifiedGenotyper'] = 'N/A'
results['freebayes'] = 'N/A'
results['VCF2genome'] = 'N/A'
results['MTNucRatioCalculator'] = 'N/A'
diff --git a/docs/usage.md b/docs/usage.md
index 7e5103464..389d54133 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -713,12 +713,18 @@ Turns on genotyping to run on all post-dedup and downstream BAMs. For example if
Specifies which genotyper to use. Current options are GATK (v3.5) UnifiedGenotyper or GATK (v4.xx). Furthermore, the FreeBayes Caller is available. Specify `'freebayes'`, `'hc'` or `'ug'` respectively.
-> NB that while UnifiedGenotyper is more suitable for low-coverage ancient DNA (HaplotypeCaller does _de novo_ assembly around each variant site), it is officially deperecated by the Broad Institute and is only accessible by an archived version not properly avaliable on `conda`. Therefore specifying 'ug' will download the GATK 3.5 `-jar` for you. This option therefore cannot be used when running the pipeline offline.
+> NB that while UnifiedGenotyper is more suitable for low-coverage ancient DNA (HaplotypeCaller does _de novo_ assembly around each variant site), it is officially deperecated by the Broad Institute and is only accessible by an archived version not properly avaliable on `conda`. Therefore if specifying 'ug', will need to supply a GATK 3.5 `-jar` to the parameter `gatk_ug_jar`. Note that this means the pipline is not fully reproducible in this configuration, unless you personally supply the `.jar` file.
#### `--genotyping_source`
Indicates which BAM file to use for genotyping, depending on what BAM processing modules you have turned on. Options are: `'raw'` for mapped only, filtered, or DeDup BAMs (with priority right to left); `'trimmed'` (for base clipped BAMs); `'pmd'` (for pmdtools output). Default is: `'raw'`.
+#### `--gatk_ug_jar`
+
+Specify a path to a local copy of a GATK 3.5 `.jar` file, preferably version '3.5-0-g36282e4'. The download location of this may be avaliable from the GATK forums of the Broad Institute.
+
+> You must manually report your version of GATK 3.5 in publications/MultiQC as it is not included in our container.
+
#### `--gatk_call_conf`
If selected a GATK genotyper phred-scaled confidence threshold of a given SNP/INDEL call. Default: 30
diff --git a/main.nf b/main.nf
index 0530d2203..4adbd312e 100644
--- a/main.nf
+++ b/main.nf
@@ -124,8 +124,9 @@ def helpMessage() {
Genotyping
--run_genotyping Perform genotyping on deduplicated BAMs.
- --genotyping_tool Specify which genotyper to use either GATK UnifiedGenotyper, GATK HaplotypeCaller or Freebayes. Note: UnifiedGenotyper uses now deprecated GATK 3.5 and requires internet access. Options: 'ug', 'hc', 'freebayes'
+ --genotyping_tool Specify which genotyper to use either GATK UnifiedGenotyper, GATK HaplotypeCaller or Freebayes. Note: UnifiedGenotyper requires user-supplied defined GATK 3.5 jar file. Options: 'ug', 'hc', 'freebayes'
--genotyping_source Specify which input BAM to use for genotyping. Options: 'raw', 'trimmed' or 'pmd' Default: 'raw'
+ --gatk_ug_jar When specifying to use GATK UnifiedGenotyper, path to GATK 3.5 .jar.
--gatk_call_conf Specify GATK phred-scaled confidence threshold. Default: 30.
--gatk_ploidy Specify GATK organism ploidy. Default: 2.
--gatk_dbsnp Specify VCF file for output VCF SNP annotation (Optional). Gzip not accepted.
@@ -360,6 +361,10 @@ if (params.run_genotyping){
if (params.genotyping_tool != 'ug' && params.genotyping_tool != 'hc' && params.genotyping_tool != 'freebayes') {
exit 1, "Please specify a genotyper. Options: 'ug', 'hc', 'freebayes'. You gave: ${params.genotyping_tool}!"
}
+
+ if (params.genotyping_tool == 'ug' && params.gatk_ug_jar == '') {
+ exit 1, "Please specify path to a GATK 3.5 .jar file with --gatk_ug_jar."
+ }
if (params.gatk_ug_out_mode != 'EMIT_VARIANTS_ONLY' && params.gatk_ug_out_mode != 'EMIT_ALL_CONFIDENT_SITES' && params.gatk_ug_out_mode != 'EMIT_ALL_SITES') {
exit 1, "Please check your GATK output mode. Options are: 'EMIT_VARIANTS_ONLY', 'EMIT_ALL_CONFIDENT_SITES', 'EMIT_ALL_SITES'. You gave: ${params.gatk_out_mode}!"
@@ -1643,75 +1648,57 @@ process bam_trim {
"""
}
-
if ( params.run_genotyping && params.genotyping_source == 'raw' ) {
ch_rmdup_for_skipdamagemanipulation.mix(ch_output_from_pmdtools,ch_output_from_bamutils)
.into { ch_damagemanipulation_for_skipgenotyping; ch_damagemanipulation_for_genotyping_ug; ch_damagemanipulation_for_genotyping_hc; ch_damagemanipulation_for_genotyping_freebayes }
-
ch_rmdupindex_for_skipdamagemanipulation.mix(ch_outputindex_from_pmdtools,ch_outputindex_from_bamutils)
.into { ch_damagemanipulationindex_for_skipgenotyping; ch_damagemanipulationindex_for_genotyping_hc; ch_damagemanipulationindex_for_genotyping_freebayes }
-
} else if ( params.run_genotyping && params.genotyping_source == "trimmed" ) {
ch_rmdup_for_skipdamagemanipulation.mix(ch_output_from_pmdtools,ch_output_from_bamutils)
.filter { it =~/.*trimmed.bam/ }
.into { ch_damagemanipulation_for_skipgenotyping; ch_damagemanipulation_for_genotyping_ug; ch_damagemanipulation_for_genotyping_hc; ch_damagemanipulation_for_genotyping_freebayes }
-
ch_rmdupindex_for_skipdamagemanipulation.mix(ch_outputindex_from_pmdtools,ch_outputindex_from_bamutils)
.filter { it =~/.*trimmed.bam.bai|.*.trimmed.bam.csi/ }
.into { ch_damagemanipulationindex_for_skipgenotyping; ch_damagemanipulationindex_for_genotyping_hc; ch_damagemanipulationindex_for_genotyping_freebayes }
-
} else if ( params.run_genotyping && params.genotyping_source == "pmd" ) {
ch_rmdup_for_skipdamagemanipulation.mix(ch_output_from_pmdtools,ch_output_from_bamutils)
.filter { it =~/.*pmd.bam/ }
.into { ch_damagemanipulation_for_skipgenotyping; ch_damagemanipulation_for_genotyping_ug; ch_damagemanipulation_for_genotyping_hc; ch_damagemanipulation_for_genotyping_freebayes }
-
ch_rmdupindex_for_skipdamagemanipulation.mix(ch_outputindex_from_pmdtools,ch_outputindex_from_bamutils)
.filter { it =~/.*pmd.bam.bai|.*.pmd.bam.csi/ }
.into { ch_damagemanipulationindex_for_skipgenotyping; ch_damagemanipulationindex_for_genotyping_hc; ch_damagemanipulationindex_for_genotyping_freebayes }
-
} else if ( !params.run_genotyping && !params.run_trim_bam && !params.run_pmdtools ) {
ch_rmdup_for_skipdamagemanipulation
.into { ch_damagemanipulation_for_skipgenotyping; ch_damagemanipulation_for_genotyping_ug; ch_damagemanipulation_for_genotyping_hc; ch_damagemanipulation_for_genotyping_freebayes }
-
ch_rmdupindex_for_skipdamagemanipulation
- .into { ch_damagemanipulationindex_for_skipgenotyping; ch_damagemanipulationindex_for_genotyping_hc; ch_damagemanipulationindex_for_genotyping_freebayes }
+ .into { ch_damagemanipulationindex_for_skipgenotyping; ch_damagemanipulationindex_for_genotyping_hc; ch_damagemanipulationindex_for_genotyping_freebayes }
} else if ( !params.run_genotyping && !params.run_trim_bam && params.run_pmdtools ) {
ch_rmdup_for_skipdamagemanipulation
.into { ch_damagemanipulation_for_skipgenotyping; ch_damagemanipulation_for_genotyping_ug; ch_damagemanipulation_for_genotyping_hc; ch_damagemanipulation_for_genotyping_freebayes }
-
ch_rmdupindex_for_skipdamagemanipulation
- .into { ch_damagemanipulationindex_for_skipgenotyping; ch_damagemanipulationindex_for_genotyping_hc; ch_damagemanipulationindex_for_genotyping_freebayes }
+ .into { ch_damagemanipulationindex_for_skipgenotyping; ch_damagemanipulationindex_for_genotyping_hc; ch_damagemanipulationindex_for_genotyping_freebayes }
+} else if ( !params.run_genotyping && params.run_trim_bam && !params.run_pmdtools ) {
+ ch_rmdup_for_skipdamagemanipulation
+ .into { ch_damagemanipulation_for_skipgenotyping; ch_damagemanipulation_for_genotyping_ug; ch_damagemanipulation_for_genotyping_hc; ch_damagemanipulation_for_genotyping_freebayes }
+ ch_rmdupindex_for_skipdamagemanipulation
+ .into { ch_damagemanipulationindex_for_skipgenotyping; ch_damagemanipulationindex_for_genotyping_hc; ch_damagemanipulationindex_for_genotyping_freebayes }
}
-
/*
- Step 12a: Genotyping - UnifiedGenotyper Downloading
- NB: GATK 3.5 is the last release with VCF output in "old" VCF format, not breaking downstream tools. Therefore we need it (for now at least until downstream tools can read proper 4.2 VCFs... )
-
+ Step 12b: Genotyping - UG
+ NB: GATK 3.5 is the last release with VCF output in "old" VCF format, not breaking MVA. Therefore we need it (for now at least until downstream tools can read proper 4.2 VCFs... )
*/
-ch_gatk_download = Channel.value("download")
-
- process download_gatk_v3_5 {
- label 'sc_tiny'
- when: params.run_genotyping && params.genotyping_tool == 'ug'
-
- input:
- val "download" from ch_gatk_download
-
- output:
- file "*.jar" into ch_unifiedgenotyper_jar,ch_unifiedgenotyper_versions_jar
-
- """
- wget -O GenomeAnalysisTK-3.5-0-g36282e4.tar.bz2 --referer https://software.broadinstitute.org/ 'https://software.broadinstitute.org/gatk/download/auth?package=GATK-archive&version=3.5-0-g36282e4'
- tar xjf GenomeAnalysisTK-3.5-0-g36282e4.tar.bz2
- """
-
- }
+if ( params.gatk_ug_jar != '' ) {
+ Channel
+ .fromPath( params.gatk_ug_jar )
+ .set{ ch_unifiedgenotyper_jar }
+} else {
+ Channel
+ .empty()
+ .set{ ch_unifiedgenotyper_jar }
+}
-/*
- Step 12b: Genotyping - UG
-*/
process genotyping_ug {
label 'mc_small'
@@ -2190,9 +2177,6 @@ process get_software_versions {
mtnucratio --help &> v_mtnucratiocalculator.txt || true
sexdeterrmine --version &> v_sexdeterrmine.txt || true
- ## Hardcoded as no --version flag or equivalent
- echo 'version 3.5-0-g36282e4' > v_gatk3_5.txt
-
scrape_software_versions.py &> software_versions_mqc.yaml
"""
}
diff --git a/nextflow.config b/nextflow.config
index fb5879ce7..87c8635f1 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -115,6 +115,7 @@ params {
run_genotyping = false
genotyping_tool = ''
genotyping_source = "raw"
+ gatk_ug_jar = ''
gatk_ug_genotype_model = 'SNP'
gatk_hc_emitrefconf = 'GVCF'
gatk_call_conf = '30'