Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions .github/workflows/nf-core_eager.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,15 @@ jobs:
- name: SKIPPING Test checking all skip steps work i.e. input bam, skipping straight to genotyping
run: |
nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-skipping_logic" -profile test_bam,docker --bam --singleEnd --skip_fastqc --skip_adapterremoval --skip_mapping --skip_deduplication --skip_qualimap --skip_preseq --skip_damage_calculation --run_genotyping --genotyping_tool 'freebayes'
- name: TRIM_BAM/PMD/GENOTYPING_UG/MULTIVCFANALYZER Test running PMDTools, TrimBam, GATK UnifiedGenotyper and MultiVCFAnalyzer
run: |
nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-pmd_trimbam_gatkUG_MVA" -profile test,docker --pairedEnd --dedupper 'dedup' --run_trim_bam --run_pmdtools --run_genotyping --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
- name: GENOTYPING_UG/PMD/MULTIVCFANALYZER Test running GATK UnifiedGenotyper and MultiVCFAnalyzer, additional VCFS
run: |
nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-MVA_additionalvcfs" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
- name: VCF2Genome Run basic pipeline with GATK unifiedgenotyper and run VCF2Genome
run: |
nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-vcf2genome" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --genotyping_source 'raw' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_vcf2genome
#- name: TRIM_BAM/PMD/GENOTYPING_UG/MULTIVCFANALYZER Test running PMDTools, TrimBam, GATK UnifiedGenotyper and MultiVCFAnalyzer
# run: |
# nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-pmd_trimbam_gatkUG_MVA" -profile test,docker --pairedEnd --dedupper 'dedup' --run_trim_bam --run_pmdtools --run_genotyping --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
#- name: GENOTYPING_UG/PMD/MULTIVCFANALYZER Test running GATK UnifiedGenotyper and MultiVCFAnalyzer, additional VCFS
# run: |
# nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-MVA_additionalvcfs" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
#- name: VCF2Genome Run basic pipeline with GATK unifiedgenotyper and run VCF2Genome
# run: |
# nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-vcf2genome" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --genotyping_source 'raw' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_vcf2genome
- name: BAM_INPUT Run the basic pipeline with the bam input profile, skip AdapterRemoval as no convertBam
run: |
nextflow run ${GITHUB_WORKSPACE} "$TOWER" -name "$RUN_NAME-baminput_noConvertBam" -profile test_bam,docker --bam --skip_adapterremoval --run_convertbam
Expand Down
6 changes: 3 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,11 @@ script:
# SKIPPING: Test checking all skip steps work i.e. input bam, skipping straight to genotyping
- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-skipping_logic" -profile test_bam,docker --bam --singleEnd --skip_fastqc --skip_adapterremoval --skip_mapping --skip_deduplication --skip_qualimap --skip_preseq --skip_damage_calculation --run_genotyping --genotyping_tool 'freebayes'
# TRIM_BAM/PMD/GENOTYPING_UG/MULTIVCFANALYZER: Test running PMDTools, TrimBam, GATK UnifiedGenotyper and MultiVCFAnalyzer
- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-pmd_trimbam_unifiedgenotyper_multivcfanalyzer" -profile test,docker --pairedEnd --dedupper 'dedup' --run_trim_bam --run_pmdtools --run_genotyping --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
#- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-pmd_trimbam_unifiedgenotyper_multivcfanalyzer" -profile test,docker --pairedEnd --dedupper 'dedup' --run_trim_bam --run_pmdtools --run_genotyping --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
# GENOTYPING_UG/PMD/MULTIVCFANALYZER: Test running GATK UnifiedGenotyper and MultiVCFAnalyzer, additional VCFS
- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-multivcfanalyzer_additionalvcfs" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/jfy133/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
#- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-multivcfanalyzer_additionalvcfs" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/jfy133/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
# VCF2GENOME: Test running GATK UnifiedGenotyper and run VCF2GENOME
- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-vcf2genome" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --genotyping_source 'raw' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_vcf2genome
#- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-vcf2genome" -profile test,docker --pairedEnd --dedupper 'dedup' --run_genotyping --genotyping_tool 'ug' --genotyping_source 'raw' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_vcf2genome
# BAM_INPUT: Run the basic pipeline with the bam input profile, skip AdapterRemoval as no convertBam
- nextflow run ${TRAVIS_BUILD_DIR} -name "$RUN_NAME-baminput_noConvertBam" -profile test_bam,docker --bam --skip_adapterremoval --run_convertbam
# BAM_INPUT: Run the basic pipeline with the bam input profile, convert to FASTQ for adapterremoval test and downstream
Expand Down
4 changes: 2 additions & 2 deletions bin/scrape_software_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
'BWA': ['v_bwa.txt', r"Version: (\S+)"],
'Qualimap': ['v_qualimap.txt', r"QualiMap v.(\S+)"],
'GATK HaplotypeCaller': ['v_gatk.txt', r" v(\S+)"],
'GATK UnifiedGenotyper': ['v_gatk3_5.txt', r"version (\S+)"],
#'GATK UnifiedGenotyper': ['v_gatk3_5.txt', r"version (\S+)"],
'bamUtil' : ['v_bamutil.txt', r"Version: (\S+);"],
'fastP': ['v_fastp.txt', r"([\d\.]+)"],
'DamageProfiler' : ['v_damageprofiler.txt', r"DamageProfiler v(\S+)"],
Expand Down Expand Up @@ -47,7 +47,7 @@
results['Qualimap'] = '<span style="color:#999999;\">N/A</span>'
results['Preseq'] = '<span style="color:#999999;\">N/A</span>'
results['GATK HaplotypeCaller'] = '<span style="color:#999999;\">N/A</span>'
results['GATK UnifiedGenotyper'] = '<span style="color:#999999;\">N/A</span>'
#results['GATK UnifiedGenotyper'] = '<span style="color:#999999;\">N/A</span>'
results['freebayes'] = '<span style="color:#999999;\">N/A</span>'
results['VCF2genome'] = '<span style="color:#999999;\">N/A</span>'
results['MTNucRatioCalculator'] = '<span style="color:#999999;\">N/A</span>'
Expand Down
8 changes: 7 additions & 1 deletion docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -713,12 +713,18 @@ Turns on genotyping to run on all post-dedup and downstream BAMs. For example if

Specifies which genotyper to use. Current options are GATK (v3.5) UnifiedGenotyper or GATK (v4.xx). Furthermore, the FreeBayes Caller is available. Specify `'freebayes'`, `'hc'` or `'ug'` respectively.

> NB that while UnifiedGenotyper is more suitable for low-coverage ancient DNA (HaplotypeCaller does _de novo_ assembly around each variant site), it is officially deperecated by the Broad Institute and is only accessible by an archived version not properly avaliable on `conda`. Therefore specifying 'ug' will download the GATK 3.5 `-jar` for you. This option therefore cannot be used when running the pipeline offline.
> NB that while UnifiedGenotyper is more suitable for low-coverage ancient DNA (HaplotypeCaller does _de novo_ assembly around each variant site), it is officially deperecated by the Broad Institute and is only accessible by an archived version not properly avaliable on `conda`. Therefore if specifying 'ug', will need to supply a GATK 3.5 `-jar` to the parameter `gatk_ug_jar`. Note that this means the pipline is not fully reproducible in this configuration, unless you personally supply the `.jar` file.

#### `--genotyping_source`

Indicates which BAM file to use for genotyping, depending on what BAM processing modules you have turned on. Options are: `'raw'` for mapped only, filtered, or DeDup BAMs (with priority right to left); `'trimmed'` (for base clipped BAMs); `'pmd'` (for pmdtools output). Default is: `'raw'`.

#### `--gatk_ug_jar`

Specify a path to a local copy of a GATK 3.5 `.jar` file, preferably version '3.5-0-g36282e4'. The download location of this may be avaliable from the GATK forums of the Broad Institute.

> You must manually report your version of GATK 3.5 in publications/MultiQC as it is not included in our container.

#### `--gatk_call_conf`

If selected a GATK genotyper phred-scaled confidence threshold of a given SNP/INDEL call. Default: 30
Expand Down
45 changes: 17 additions & 28 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,9 @@ def helpMessage() {

Genotyping
--run_genotyping Perform genotyping on deduplicated BAMs.
--genotyping_tool Specify which genotyper to use either GATK UnifiedGenotyper, GATK HaplotypeCaller or Freebayes. Note: UnifiedGenotyper uses now deprecated GATK 3.5 and requires internet access. Options: 'ug', 'hc', 'freebayes'
--genotyping_tool Specify which genotyper to use either GATK UnifiedGenotyper, GATK HaplotypeCaller or Freebayes. Note: UnifiedGenotyper requires user-supplied defined GATK 3.5 jar file. Options: 'ug', 'hc', 'freebayes'
--genotyping_source Specify which input BAM to use for genotyping. Options: 'raw', 'trimmed' or 'pmd' Default: 'raw'
--gatk_ug_jar When specifying to use GATK UnifiedGenotyper, path to GATK 3.5 .jar.
--gatk_call_conf Specify GATK phred-scaled confidence threshold. Default: 30.
--gatk_ploidy Specify GATK organism ploidy. Default: 2.
--gatk_dbsnp Specify VCF file for output VCF SNP annotation (Optional). Gzip not accepted.
Expand Down Expand Up @@ -360,6 +361,10 @@ if (params.run_genotyping){
if (params.genotyping_tool != 'ug' && params.genotyping_tool != 'hc' && params.genotyping_tool != 'freebayes') {
exit 1, "Please specify a genotyper. Options: 'ug', 'hc', 'freebayes'. You gave: ${params.genotyping_tool}!"
}

if (params.genotyping_tool == 'ug' && params.gatk_ug_jar == '') {
exit 1, "Please specify path to a GATK 3.5 .jar file with --gatk_ug_jar."
}

if (params.gatk_ug_out_mode != 'EMIT_VARIANTS_ONLY' && params.gatk_ug_out_mode != 'EMIT_ALL_CONFIDENT_SITES' && params.gatk_ug_out_mode != 'EMIT_ALL_SITES') {
exit 1, "Please check your GATK output mode. Options are: 'EMIT_VARIANTS_ONLY', 'EMIT_ALL_CONFIDENT_SITES', 'EMIT_ALL_SITES'. You gave: ${params.gatk_out_mode}!"
Expand Down Expand Up @@ -1685,33 +1690,20 @@ if ( params.run_genotyping && params.genotyping_source == 'raw' ) {


/*
Step 12a: Genotyping - UnifiedGenotyper Downloading
NB: GATK 3.5 is the last release with VCF output in "old" VCF format, not breaking downstream tools. Therefore we need it (for now at least until downstream tools can read proper 4.2 VCFs... )

Step 12b: Genotyping - UG
NB: GATK 3.5 is the last release with VCF output in "old" VCF format, not breaking MVA. Therefore we need it (for now at least until downstream tools can read proper 4.2 VCFs... )
*/

ch_gatk_download = Channel.value("download")

process download_gatk_v3_5 {
label 'sc_tiny'
when: params.run_genotyping && params.genotyping_tool == 'ug'

input:
val "download" from ch_gatk_download

output:
file "*.jar" into ch_unifiedgenotyper_jar,ch_unifiedgenotyper_versions_jar

"""
wget -O GenomeAnalysisTK-3.5-0-g36282e4.tar.bz2 --referer https://software.broadinstitute.org/ 'https://software.broadinstitute.org/gatk/download/auth?package=GATK-archive&version=3.5-0-g36282e4'
tar xjf GenomeAnalysisTK-3.5-0-g36282e4.tar.bz2
"""

}
if ( params.gatk_ug_jar != '' ) {
Channel
.fromPath( params.gatk_ug_jar )
.set{ ch_unifiedgenotyper_jar }
} else {
Channel
.empty()
.set{ ch_unifiedgenotyper_jar }
}

/*
Step 12b: Genotyping - UG
*/

process genotyping_ug {
label 'mc_small'
Expand Down Expand Up @@ -2190,9 +2182,6 @@ process get_software_versions {
mtnucratio --help &> v_mtnucratiocalculator.txt || true
sexdeterrmine --version &> v_sexdeterrmine.txt || true

## Hardcoded as no --version flag or equivalent
echo 'version 3.5-0-g36282e4' > v_gatk3_5.txt

scrape_software_versions.py &> software_versions_mqc.yaml
"""
}
Expand Down
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ params {
run_genotyping = false
genotyping_tool = ''
genotyping_source = "raw"
gatk_ug_jar = ''
gatk_ug_genotype_model = 'SNP'
gatk_hc_emitrefconf = 'GVCF'
gatk_call_conf = '30'
Expand Down