Skip to content

Commit 91b5e38

Browse files
authored
Merge pull request #332 from jfy133/gatkug_fix
GATK UnifiedGenotyper Fix due to Broad Website change
2 parents ff67599 + 44acd9b commit 91b5e38

6 files changed

Lines changed: 39 additions & 43 deletions

File tree

.github/workflows/nf-core_eager.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -86,15 +86,15 @@ jobs:
8686
- name: SKIPPING Test checking all skip steps work i.e. input bam, skipping straight to genotyping
8787
run: |
8888
nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-skipping_logic" -profile test_bam,docker --bam --singleEnd --skip_fastqc --skip_adapterremoval --skip_mapping --skip_deduplication --skip_qualimap --skip_preseq --skip_damage_calculation --run_genotyping --genotyping_tool 'freebayes'
89-
- name: TRIM_BAM/PMD/GENOTYPING_UG/MULTIVCFANALYZER Test running PMDTools, TrimBam, GATK UnifiedGenotyper and MultiVCFAnalyzer
90-
run: |
91-
nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-pmd_trimbam_gatkUG_MVA" -profile test,docker --pairedEnd --dedupper 'dedup' --run_trim_bam --run_pmdtools --run_genotyping --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
92-
- name: GENOTYPING_UG/PMD/MULTIVCFANALYZER Test running GATK UnifiedGenotyper and MultiVCFAnalyzer, additional VCFS
93-
run: |
94-
nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-MVA_additionalvcfs" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
95-
- name: VCF2Genome Run basic pipeline with GATK unifiedgenotyper and run VCF2Genome
96-
run: |
97-
nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-vcf2genome" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --genotyping_source 'raw' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_vcf2genome
89+
#- name: TRIM_BAM/PMD/GENOTYPING_UG/MULTIVCFANALYZER Test running PMDTools, TrimBam, GATK UnifiedGenotyper and MultiVCFAnalyzer
90+
# run: |
91+
# nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-pmd_trimbam_gatkUG_MVA" -profile test,docker --pairedEnd --dedupper 'dedup' --run_trim_bam --run_pmdtools --run_genotyping --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
92+
#- name: GENOTYPING_UG/PMD/MULTIVCFANALYZER Test running GATK UnifiedGenotyper and MultiVCFAnalyzer, additional VCFS
93+
# run: |
94+
# nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-MVA_additionalvcfs" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
95+
#- name: VCF2Genome Run basic pipeline with GATK unifiedgenotyper and run VCF2Genome
96+
# run: |
97+
# nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-vcf2genome" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --genotyping_source 'raw' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_vcf2genome
9898
- name: BAM_INPUT Run the basic pipeline with the bam input profile, skip AdapterRemoval as no convertBam
9999
run: |
100100
nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-baminput_noConvertBam" -profile test_bam,docker --bam --skip_adapterremoval --run_convertbam

.travis.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,11 +78,11 @@ script:
7878
# SKIPPING: Test checking all skip steps work i.e. input bam, skipping straight to genotyping
7979
- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-skipping_logic" -profile test_bam,docker --bam --singleEnd --skip_fastqc --skip_adapterremoval --skip_mapping --skip_deduplication --skip_qualimap --skip_preseq --skip_damage_calculation --run_genotyping --genotyping_tool 'freebayes'
8080
# TRIM_BAM/PMD/GENOTYPING_UG/MULTIVCFANALYZER: Test running PMDTools, TrimBam, GATK UnifiedGenotyper and MultiVCFAnalyzer
81-
- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-pmd_trimbam_unifiedgenotyper_multivcfanalyzer" -profile test,docker --pairedEnd --dedupper 'dedup' --run_trim_bam --run_pmdtools --run_genotyping --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
81+
#- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-pmd_trimbam_unifiedgenotyper_multivcfanalyzer" -profile test,docker --pairedEnd --dedupper 'dedup' --run_trim_bam --run_pmdtools --run_genotyping --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
8282
# GENOTYPING_UG/PMD/MULTIVCFANALYZER: Test running GATK UnifiedGenotyper and MultiVCFAnalyzer, additional VCFS
83-
- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-multivcfanalyzer_additionalvcfs" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/jfy133/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
83+
#- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-multivcfanalyzer_additionalvcfs" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/jfy133/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
8484
# VCF2GENOME: Test running GATK UnifiedGenotyper and run VCF2GENOME
85-
- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-vcf2genome" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --genotyping_source 'raw' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_vcf2genome
85+
#- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-vcf2genome" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --genotyping_source 'raw' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_vcf2genome
8686
# BAM_INPUT: Run the basic pipeline with the bam input profile, skip AdapterRemoval as no convertBam
8787
- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-baminput_noConvertBam" -profile test_bam,docker --bam --skip_adapterremoval --run_convertbam
8888
# BAM_INPUT: Run the basic pipeline with the bam input profile, convert to FASTQ for adapterremoval test and downstream

bin/scrape_software_versions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
'BWA': ['v_bwa.txt', r"Version: (\S+)"],
1616
'Qualimap': ['v_qualimap.txt', r"QualiMap v.(\S+)"],
1717
'GATK HaplotypeCaller': ['v_gatk.txt', r" v(\S+)"],
18-
'GATK UnifiedGenotyper': ['v_gatk3_5.txt', r"version (\S+)"],
18+
#'GATK UnifiedGenotyper': ['v_gatk3_5.txt', r"version (\S+)"],
1919
'bamUtil' : ['v_bamutil.txt', r"Version: (\S+);"],
2020
'fastP': ['v_fastp.txt', r"([\d\.]+)"],
2121
'DamageProfiler' : ['v_damageprofiler.txt', r"DamageProfiler v(\S+)"],
@@ -47,7 +47,7 @@
4747
results['Qualimap'] = '<span style="color:#999999;\">N/A</span>'
4848
results['Preseq'] = '<span style="color:#999999;\">N/A</span>'
4949
results['GATK HaplotypeCaller'] = '<span style="color:#999999;\">N/A</span>'
50-
results['GATK UnifiedGenotyper'] = '<span style="color:#999999;\">N/A</span>'
50+
#results['GATK UnifiedGenotyper'] = '<span style="color:#999999;\">N/A</span>'
5151
results['freebayes'] = '<span style="color:#999999;\">N/A</span>'
5252
results['VCF2genome'] = '<span style="color:#999999;\">N/A</span>'
5353
results['MTNucRatioCalculator'] = '<span style="color:#999999;\">N/A</span>'

docs/usage.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -713,12 +713,18 @@ Turns on genotyping to run on all post-dedup and downstream BAMs. For example if
713713

714714
Specifies which genotyper to use. Current options are GATK (v3.5) UnifiedGenotyper or GATK (v4.xx). Furthermore, the FreeBayes Caller is available. Specify `'freebayes'`, `'hc'` or `'ug'` respectively.
715715

716-
> NB that while UnifiedGenotyper is more suitable for low-coverage ancient DNA (HaplotypeCaller does _de novo_ assembly around each variant site), it is officially deperecated by the Broad Institute and is only accessible by an archived version not properly avaliable on `conda`. Therefore specifying 'ug' will download the GATK 3.5 `-jar` for you. This option therefore cannot be used when running the pipeline offline.
716+
> NB that while UnifiedGenotyper is more suitable for low-coverage ancient DNA (HaplotypeCaller does _de novo_ assembly around each variant site), it is officially deperecated by the Broad Institute and is only accessible by an archived version not properly avaliable on `conda`. Therefore if specifying 'ug', will need to supply a GATK 3.5 `-jar` to the parameter `gatk_ug_jar`. Note that this means the pipline is not fully reproducible in this configuration, unless you personally supply the `.jar` file.
717717
718718
#### `--genotyping_source`
719719

720720
Indicates which BAM file to use for genotyping, depending on what BAM processing modules you have turned on. Options are: `'raw'` for mapped only, filtered, or DeDup BAMs (with priority right to left); `'trimmed'` (for base clipped BAMs); `'pmd'` (for pmdtools output). Default is: `'raw'`.
721721

722+
#### `--gatk_ug_jar`
723+
724+
Specify a path to a local copy of a GATK 3.5 `.jar` file, preferably version '3.5-0-g36282e4'. The download location of this may be avaliable from the GATK forums of the Broad Institute.
725+
726+
> You must manually report your version of GATK 3.5 in publications/MultiQC as it is not included in our container.
727+
722728
#### `--gatk_call_conf`
723729

724730
If selected a GATK genotyper phred-scaled confidence threshold of a given SNP/INDEL call. Default: 30

main.nf

Lines changed: 17 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,9 @@ def helpMessage() {
124124
125125
Genotyping
126126
--run_genotyping Perform genotyping on deduplicated BAMs.
127-
--genotyping_tool Specify which genotyper to use either GATK UnifiedGenotyper, GATK HaplotypeCaller or Freebayes. Note: UnifiedGenotyper uses now deprecated GATK 3.5 and requires internet access. Options: 'ug', 'hc', 'freebayes'
127+
--genotyping_tool Specify which genotyper to use either GATK UnifiedGenotyper, GATK HaplotypeCaller or Freebayes. Note: UnifiedGenotyper requires user-supplied defined GATK 3.5 jar file. Options: 'ug', 'hc', 'freebayes'
128128
--genotyping_source Specify which input BAM to use for genotyping. Options: 'raw', 'trimmed' or 'pmd' Default: 'raw'
129+
--gatk_ug_jar When specifying to use GATK UnifiedGenotyper, path to GATK 3.5 .jar.
129130
--gatk_call_conf Specify GATK phred-scaled confidence threshold. Default: 30.
130131
--gatk_ploidy Specify GATK organism ploidy. Default: 2.
131132
--gatk_dbsnp Specify VCF file for output VCF SNP annotation (Optional). Gzip not accepted.
@@ -360,6 +361,10 @@ if (params.run_genotyping){
360361
if (params.genotyping_tool != 'ug' && params.genotyping_tool != 'hc' && params.genotyping_tool != 'freebayes') {
361362
exit 1, "Please specify a genotyper. Options: 'ug', 'hc', 'freebayes'. You gave: ${params.genotyping_tool}!"
362363
}
364+
365+
if (params.genotyping_tool == 'ug' && params.gatk_ug_jar == '') {
366+
exit 1, "Please specify path to a GATK 3.5 .jar file with --gatk_ug_jar."
367+
}
363368

364369
if (params.gatk_ug_out_mode != 'EMIT_VARIANTS_ONLY' && params.gatk_ug_out_mode != 'EMIT_ALL_CONFIDENT_SITES' && params.gatk_ug_out_mode != 'EMIT_ALL_SITES') {
365370
exit 1, "Please check your GATK output mode. Options are: 'EMIT_VARIANTS_ONLY', 'EMIT_ALL_CONFIDENT_SITES', 'EMIT_ALL_SITES'. You gave: ${params.gatk_out_mode}!"
@@ -1685,33 +1690,20 @@ if ( params.run_genotyping && params.genotyping_source == 'raw' ) {
16851690

16861691

16871692
/*
1688-
Step 12a: Genotyping - UnifiedGenotyper Downloading
1689-
NB: GATK 3.5 is the last release with VCF output in "old" VCF format, not breaking downstream tools. Therefore we need it (for now at least until downstream tools can read proper 4.2 VCFs... )
1690-
1693+
Step 12b: Genotyping - UG
1694+
NB: GATK 3.5 is the last release with VCF output in "old" VCF format, not breaking MVA. Therefore we need it (for now at least until downstream tools can read proper 4.2 VCFs... )
16911695
*/
16921696

1693-
ch_gatk_download = Channel.value("download")
1694-
1695-
process download_gatk_v3_5 {
1696-
label 'sc_tiny'
1697-
when: params.run_genotyping && params.genotyping_tool == 'ug'
1698-
1699-
input:
1700-
val "download" from ch_gatk_download
1701-
1702-
output:
1703-
file "*.jar" into ch_unifiedgenotyper_jar,ch_unifiedgenotyper_versions_jar
1704-
1705-
"""
1706-
wget -O GenomeAnalysisTK-3.5-0-g36282e4.tar.bz2 --referer https://software.broadinstitute.org/ 'https://software.broadinstitute.org/gatk/download/auth?package=GATK-archive&version=3.5-0-g36282e4'
1707-
tar xjf GenomeAnalysisTK-3.5-0-g36282e4.tar.bz2
1708-
"""
1709-
1710-
}
1697+
if ( params.gatk_ug_jar != '' ) {
1698+
Channel
1699+
.fromPath( params.gatk_ug_jar )
1700+
.set{ ch_unifiedgenotyper_jar }
1701+
} else {
1702+
Channel
1703+
.empty()
1704+
.set{ ch_unifiedgenotyper_jar }
1705+
}
17111706

1712-
/*
1713-
Step 12b: Genotyping - UG
1714-
*/
17151707

17161708
process genotyping_ug {
17171709
label 'mc_small'
@@ -2190,9 +2182,6 @@ process get_software_versions {
21902182
mtnucratio --help &> v_mtnucratiocalculator.txt || true
21912183
sexdeterrmine --version &> v_sexdeterrmine.txt || true
21922184
2193-
## Hardcoded as no --version flag or equivalent
2194-
echo 'version 3.5-0-g36282e4' > v_gatk3_5.txt
2195-
21962185
scrape_software_versions.py &> software_versions_mqc.yaml
21972186
"""
21982187
}

nextflow.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ params {
115115
run_genotyping = false
116116
genotyping_tool = ''
117117
genotyping_source = "raw"
118+
gatk_ug_jar = ''
118119
gatk_ug_genotype_model = 'SNP'
119120
gatk_hc_emitrefconf = 'GVCF'
120121
gatk_call_conf = '30'

0 commit comments

Comments
 (0)