diff --git a/subworkflows/nf-core/gatk4/vqsr/main.nf b/subworkflows/nf-core/gatk4/vqsr/main.nf new file mode 100644 index 0000000000..5d89d4d51b --- /dev/null +++ b/subworkflows/nf-core/gatk4/vqsr/main.nf @@ -0,0 +1,85 @@ +// +// Recalibrate with variantrecalibrator & applyvqsr. +// + +include { GATK4_VARIANTRECALIBRATOR as GATK4_VARIANTRECALIBRATOR_SNP } from '../../../modules/gatk4/variantrecalibrator/main' +include { GATK4_VARIANTRECALIBRATOR as GATK4_VARIANTRECALIBRATOR_INDEL } from '../../../modules/gatk4/variantrecalibrator/main' +include { GATK4_APPLYVQSR as GATK4_APPLYVQSR_SNP } from '../../../modules/gatk4/applyvqsr/main' +include { GATK4_APPLYVQSR as GATK4_APPLYVQSR_INDEL } from '../../../modules/gatk4/applyvqsr/main' +include { GATK4_SELECTVARIANTS as GATK4_SELECTVARIANTS_SNP } from '../../../modules/gatk4/selectvariants/main' +include { GATK4_SELECTVARIANTS as GATK4_SELECTVARIANTS_INDEL } from '../../../modules/gatk4/selectvariants/main' +include { GATK4_SELECTVARIANTS as GATK4_SELECTVARIANTS_NORECAL } from '../../../modules/gatk4/selectvariants/main' +include { GATK4_MERGEVCFS as GATK4_MERGEVCFS_RECALIBRATED } from '../../../modules/gatk4/mergevcfs/main' + +workflow GATK_VQSR { + take: + input // channel: [ val(meta), [ input ], [ input_index ], [] ] + fasta // channel: /path/to/reference/fasta + fai // channel: /path/to/reference/fasta/index + dict // channel: /path/to/reference/fasta/dictionary + allelespecific // channel: true/false run allelespecific mode of vqsr modules + resources_SNP // channel: [[resource, vcfs, forvariantrecal], [resource, tbis, forvariantrecal], [resource, labels, forvariantrecal]] + resources_INDEL // channel: [[resource, vcfs, forvariantrecal], [resource, tbis, forvariantrecal], [resource, labels, forvariantrecal]] + annotation_SNP // channel: [annotations, to, use, for, variantrecal, filtering] + annotation_INDEL // channel: [annotations, to, use, for, variantrecal, filtering] + create_rscript // channel: true/false whether to generate rscript plots in variantrecal + truthsensitivity // channel: 0-100.0 truthsensitivity cutoff for applyvqsr + + main: + ch_versions = Channel.empty() + ch_select_variants_in = input + + GATK4_SELECTVARIANTS_SNP (ch_select_variants_in) + ch_versions = ch_versions.mix(GATK4_SELECTVARIANTS_SNP.out.versions) + + ch_vrecal_snp_in = GATK4_SELECTVARIANTS_SNP.out.vcf.combine(GATK4_SELECTVARIANTS_SNP.out.tbi, by: 0) + GATK4_VARIANTRECALIBRATOR_SNP ( ch_vrecal_snp_in, fasta, fai, dict, allelespecific, resources_SNP, annotation_SNP, 'SNP', create_rscript ) + ch_versions = ch_versions.mix(GATK4_VARIANTRECALIBRATOR_SNP.out.versions) + + ch_snp_recal = GATK4_VARIANTRECALIBRATOR_SNP.out.recal + ch_snp_idx = GATK4_VARIANTRECALIBRATOR_SNP.out.idx + ch_snp_tranches = GATK4_VARIANTRECALIBRATOR_SNP.out.tranches + ch_snp_vqsr_in = ch_vrecal_snp_in.combine(ch_snp_recal, by: 0).combine(ch_snp_idx, by: 0).combine(ch_snp_tranches, by: 0) + GATK4_APPLYVQSR_SNP ( ch_snp_vqsr_in, fasta, fai, dict, allelespecific, truthsensitivity, 'SNP' ) + ch_versions = ch_versions.mix(GATK4_APPLYVQSR_SNP.out.versions) + + GATK4_SELECTVARIANTS_INDEL (ch_select_variants_in) + + ch_vrecal_indel_in = GATK4_SELECTVARIANTS_INDEL.out.vcf.combine(GATK4_SELECTVARIANTS_INDEL.out.tbi, by: 0) + GATK4_VARIANTRECALIBRATOR_INDEL ( ch_vrecal_indel_in, fasta, fai, dict, allelespecific, resources_INDEL, annotation_INDEL, 'INDEL', create_rscript ) + + ch_indel_recal = GATK4_VARIANTRECALIBRATOR_INDEL.out.recal + ch_indel_idx = GATK4_VARIANTRECALIBRATOR_INDEL.out.idx + ch_indel_tranches = GATK4_VARIANTRECALIBRATOR_INDEL.out.tranches + ch_indel_vqsr_in = ch_vrecal_indel_in.combine(ch_indel_recal, by: 0).combine(ch_indel_idx, by: 0).combine(ch_indel_tranches, by: 0) + GATK4_APPLYVQSR_INDEL ( ch_indel_vqsr_in, fasta, fai, dict, allelespecific, truthsensitivity, 'INDEL' ) + + GATK4_SELECTVARIANTS_NORECAL (ch_select_variants_in) + + ch_merge_recal_in = GATK4_SELECTVARIANTS_NORECAL.out.vcf.mix(GATK4_APPLYVQSR_SNP.out.vcf).mix(GATK4_APPLYVQSR_INDEL.out.vcf).groupTuple(by: 0) + GATK4_MERGEVCFS_RECALIBRATED(ch_merge_recal_in, dict, true) + + emit: + versions = ch_versions // channel: [ versions.yml ] + select_var_snp_vcf = GATK4_SELECTVARIANTS_SNP.out.vcf + select_var_snp_tbi = GATK4_SELECTVARIANTS_SNP.out.tbi + recal_snp_file = GATK4_VARIANTRECALIBRATOR_SNP.out.recal // channel: [ val(meta), [ recal ] ] + recal_snp_index = GATK4_VARIANTRECALIBRATOR_SNP.out.idx // channel: [ val(meta), [ idx ] ] + recal_snp_tranches = GATK4_VARIANTRECALIBRATOR_SNP.out.tranches // channel: [ val(meta), [ tranches ] ] + vqsr_snp_vcf = GATK4_APPLYVQSR_SNP.out.vcf // channel: [ val(meta), [ vcf ] ] + vqsr_snp_index = GATK4_APPLYVQSR_SNP.out.tbi // channel: [ val(meta), [ tbi ] ] + + select_var_indel_vcf = GATK4_SELECTVARIANTS_INDEL.out.vcf + select_var_indel_tbi = GATK4_SELECTVARIANTS_INDEL.out.tbi + recal_indel_file = GATK4_VARIANTRECALIBRATOR_INDEL.out.recal // channel: [ val(meta), [ recal ] ] + recal_indel_index = GATK4_VARIANTRECALIBRATOR_INDEL.out.idx // channel: [ val(meta), [ idx ] ] + recal_indel_tranches = GATK4_VARIANTRECALIBRATOR_INDEL.out.tranches // channel: [ val(meta), [ tranches ] ] + vqsr_indel_vcf = GATK4_APPLYVQSR_INDEL.out.vcf // channel: [ val(meta), [ vcf ] ] + vqsr_indel_index = GATK4_APPLYVQSR_INDEL.out.tbi // channel: [ val(meta), [ tbi ] ] + + select_var_norecal_vcf = GATK4_SELECTVARIANTS_NORECAL.out.vcf + select_var_norecal_tbi = GATK4_SELECTVARIANTS_NORECAL.out.tbi + + merged_recal_vcf = GATK4_MERGEVCFS_RECALIBRATED.out.vcf + merged_recal_tbi = GATK4_MERGEVCFS_RECALIBRATED.out.tbi +} diff --git a/subworkflows/nf-core/gatk4/vqsr/meta.yml b/subworkflows/nf-core/gatk4/vqsr/meta.yml new file mode 100644 index 0000000000..2693b549fd --- /dev/null +++ b/subworkflows/nf-core/gatk4/vqsr/meta.yml @@ -0,0 +1,153 @@ +name: gatk_vqsr +description: | + Recalibrate joint vcf using step1 (VariantRecalibrator) and step2 (applyVQSR) of VQSR. +keywords: + - gatk4 + - variantrecalibrator + - applyvqsr + - selectvariants + - mergevcfs + - variant_calling + - joint_germline + - VQSR +modules: + - gatk4/mergevcfs + - gatk4/selectvariants + - gatk4/variantrecalibrator + - gatk4/applyvqsr +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - input: + type: list + description: | + Vcf file and its index file containing the joint germline to be recalibrated + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - allelespecific: + type: boolean + description: run allelespecific mode of vqsr modules. + pattern: "{true,false}" + - resources_SNP: + type: val + description: | + resource vcfs, indexes and labels to be used as truth, training and known sites resources, + vcf list imports the files into the module, + resource_labels list contains file names which are passed to the command. + pattern: "[[resource, vcfs, forvariantrecal], [resource, tbis, forvariantrecal], [resource, labels, forvariantrecal]]" + - resources_INDEL: + type: val + description: | + resource vcfs, indexes and labels to be used as truth, training and known sites resources, + vcf list imports the files into the module, + resource_labels list contains file names which are passed to the command. + pattern: "[[resource, vcfs, forvariantrecal], [resource, tbis, forvariantrecal], [resource, labels, forvariantrecal]]" + - annotation_SNP: + type: list + description: list of annotations to use for variantrecal filtering SNP mode. + pattern: "['QD', 'FS', 'SOR']" + - annotation_INDEL: + type: list + description: list of annotations to use for variantrecal filtering INDEL mode. + pattern: "['QD', 'FS', 'SOR']" + - create_rscript: + type: boolean + description: whether to generate rscript plots in variantrecal. + pattern: "{true,false}" + - truthsensitivity: + type: val + description: truthsensitivity cutoff for applyvqsr. + pattern: "0.0-100.0" + +output: + - versions: + type: file + description: File containing software versions + pattern: 'versions.yml' + - select_var_indel_vcf: + type: file + description: vcf file containing indels. + pattern: "*.vcf.gz" + - select_var_indel_index: + type: file + description: Index file for vcf containing indels. + pattern: "*.vcf.gz.tbi" + - select_var_snp_vcf: + type: file + description: vcf file containing snps. + pattern: "*.vcf.gz" + - select_var_snp_index: + type: file + description: Index file for vcf containing snps. + pattern: "*.vcf.gz.tbi" + - select_var_norecal_vcf: + type: file + description: vcf file containing other variants. + pattern: "*.vcf.gz" + - select_var_norecal_index: + type: file + description: Index file for vcf containing other variants. + pattern: "*.vcf.gz.tbi" + - recal_snp_file: + type: file + description: recalibration tables file from variantrecalibrator. + pattern: "*.recal" + - recal_snp_index: + type: file + description: Index file for the recalibration tables. + pattern: "*.idx" + - recal_snp_tranches: + type: file + description: Tranches file for the recalibration tables. + pattern: "*.tranches" + - vqsr_snp_vcf: + type: file + description: joint vcf file recalibrated using applyvqsr. + pattern: "*.vcf.gz" + - vqsr_snp_index: + type: file + description: Index file for recalibrated joint vcf. + pattern: "*.vcf.gz.tbi" + - recal_indel_file: + type: file + description: recalibration tables file from variantrecalibrator. + pattern: "*.recal" + - recal_indel_index: + type: file + description: Index file for the recalibration tables. + pattern: "*.idx" + - recal_indel_tranches: + type: file + description: Tranches file for the recalibration tables. + pattern: "*.tranches" + - vqsr_indel_vcf: + type: file + description: joint vcf file recalibrated using applyvqsr. + pattern: "*.vcf.gz" + - vqsr_indel_index: + type: file + description: Index file for recalibrated joint vcf. + pattern: "*.vcf.gz.tbi" + - merged_recal_vcf: + type: file + description: vcf file containing merged, recalibrated vcf. + pattern: "*.vcf.gz" + - merged_recal_tbi: + type: file + description: Index file merged recalivrated vcf. + pattern: "*.vcf.gz.tbi" +authors: + - '@GCJMackenzie'