diff --git a/.github/workflows/local_modules.yml b/.github/workflows/local_modules.yml
deleted file mode 100644
index 2eee654198..0000000000
--- a/.github/workflows/local_modules.yml
+++ /dev/null
@@ -1,99 +0,0 @@
-name: Local Modules pytest-workflow
-on: [push, pull_request]
-jobs:
- changes:
- name: Check for changes
- runs-on: ubuntu-latest
- outputs:
- # Expose matched filters as job 'modules' output variable
- modules: ${{ steps.filter.outputs.changes }}
- steps:
- - uses: actions/checkout@v2
- - uses: dorny/paths-filter@v2
- id: filter
- with:
- filters: "tests/config/pytest_software.yml"
-
- test:
- runs-on: ubuntu-latest
- name: ${{ matrix.tags }} ${{ matrix.profile }} ${{ matrix.nxf_version }}
- needs: changes
- if: needs.changes.outputs.modules != '[]'
- strategy:
- fail-fast: false
- matrix:
- nxf_version: ["21.04.0"]
- tags: ["${{ fromJson(needs.changes.outputs.modules) }}"]
- profile: ["docker", "singularity"] # 'conda'
- env:
- NXF_ANSI_LOG: false
- steps:
- - uses: actions/checkout@v2
-
- - name: Set up Python
- uses: actions/setup-python@v2
- with:
- python-version: "3.x"
-
- - uses: actions/cache@v2
- with:
- path: ~/.cache/pip
- key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
- restore-keys: |
- ${{ runner.os }}-pip-
-
- - name: Set up Python
- uses: actions/setup-python@v2
- with:
- python-version: "3.x"
-
- - name: Install Python dependencies
- run: python -m pip install --upgrade pip pytest-workflow
-
- - uses: actions/cache@v2
- with:
- path: /usr/local/bin/nextflow
- key: ${{ runner.os }}-nextflow-${{ matrix.nxf_version }}
- restore-keys: |
- ${{ runner.os }}-nextflow-
-
- - name: Install Nextflow
- env:
- NXF_VER: ${{ matrix.nxf_version }}
- CAPSULE_LOG: none
- run: |
- wget -qO- get.nextflow.io | bash
- sudo mv nextflow /usr/local/bin/
-
- - name: Set up Singularity
- if: matrix.profile == 'singularity'
- uses: eWaterCycle/setup-singularity@v5
- with:
- singularity-version: 3.7.1
-
- - name: Setup miniconda
- if: matrix.profile == 'conda'
- uses: conda-incubator/setup-miniconda@v2
- with:
- auto-update-conda: true
- channels: conda-forge,bioconda,defaults
- python-version: ${{ matrix.python-version }}
-
- - name: Conda clean
- if: matrix.profile == 'conda'
- run: conda clean -a
-
- # Test the module
- - name: Run pytest-workflow
- # only use one thread for pytest-workflow to avoid race condition on conda cache.
- run: TMPDIR=~ PROFILE=${{ matrix.profile }} pytest --tag ${{ matrix.tags }} --symlink --kwdof
-
- - name: Upload logs on failure
- if: failure()
- uses: actions/upload-artifact@v2
- with:
- name: logs-${{ matrix.tags }}-${{ matrix.profile }}-${{ matrix.nxf_version }}
- path: |
- /home/runner/pytest_workflow_*/*/.nextflow.log
- /home/runner/pytest_workflow_*/*/log.out
- /home/runner/pytest_workflow_*/*/log.err
diff --git a/conf/test.config b/conf/test.config
index 0a4e257c09..6c5df2785d 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -177,7 +177,7 @@ profiles {
}
umi {
params.input = "${projectDir}/tests/csv/3.0/fastq_umi.csv"
- params.umi_read_structure = '7M1S+T'
+ params.umi_read_structure = '+T 7M1S+T'
}
use_gatk_spark {
params.use_gatk_spark = 'baserecalibrator,markduplicates'
diff --git a/conf/test_full_somatic.config b/conf/test_full_somatic.config
index 956b540b20..896825c400 100644
--- a/conf/test_full_somatic.config
+++ b/conf/test_full_somatic.config
@@ -19,7 +19,6 @@ params {
// Other params
tools = 'strelka,mutect2,freebayes,ascat,manta,cnvkit,tiddit,controlfreec,vep'
-
split_fastq = 20000000
intervals = 's3://nf-core-awsmegatests/sarek/input/S07604624_Padded_Agilent_SureSelectXT_allexons_V6_UTR.bed'
wes = true
diff --git a/docs/usage.md b/docs/usage.md
index 6fe7ef39d5..470f4c213c 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -26,7 +26,7 @@ Note that the pipeline will create the following files in your working directory
```console
work # Directory containing the nextflow working files
results # Finished results (configurable, see below)
-.nextflow_log # Log file from Nextflow
+.nextflow.log # Log file from Nextflow
# Other nextflow hidden files, eg. history of pipeline runs and old logs.
```
@@ -58,7 +58,7 @@ Multiple CSV files can be specified if the path is enclosed in quotes.
| `sex` | **Sex chromosomes of the patient**; i.e. XX, XY..., only used for Copy-Number Variation analysis in a tumor/pair
_Optional, Default: `NA`_ |
| `status` | **Normal/tumor status of sample**; can be `0` (normal) or `1` (tumor).
_Optional, Default: `0`_ |
| `sample` | **Custom sample ID** for each tumor and normal sample; more than one tumor sample for each subject is possible, i.e. a tumor and a relapse; samples can have multiple lanes for which the _same_ ID must be used to merge them later (see also `lane`). Sample IDs must be unique for unique biological samples
_Required_ |
-| `lane` | Lane ID, used when the `sample` is multiplexed on several lanes. Must be unique for each lane in the same sample (but does not need to be the original lane name), and must contain at least one character
_Required for `--step_mapping`_ |
+| `lane` | Lane ID, used when the `sample` is multiplexed on several lanes. Must be unique for each lane in the same sample (but does not need to be the original lane name), and must contain at least one character
_Required for `--step mapping`_ |
| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension `.fastq.gz` or `.fq.gz`. |
| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension `.fastq.gz` or `.fq.gz`. |
| `bam` | Full path to (u)BAM file |
@@ -672,7 +672,8 @@ This will enable pre-processing of the reads and UMI consensus reads calling, wh
### UMI Read Structure
This parameter is a string, which follows a [convention](https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures) to describe the structure of the umi.
-If your reads contain a UMI only on one end, the string should only represent one structure (i.e. "2M11S+T"); should your reads contain a UMI on both ends, the string will contain two structures separated by a blank space (i.e. "2M11S+T 2M11S+T").
+
+As an example: if your reads contain a UMI only on the forward read, the string can only represent one structure (i.e. "2M11S+T"); should your reads contain a UMI on both reas, the string will contain two structures separated by a blank space (i.e. "2M11S+T 2M11S+T"); should your reads contain a UMI only on the reverse read, your structure must represent the template only for the forward read and template plus UMI for the reverse read (i.e. +T 12M11S+T). Please do refer to FGBIO documentation for more details, as providing the correct structure is essential and specific to the UMI kit used.
### Limitations and future updates
diff --git a/nextflow.config b/nextflow.config
index c91a456e4c..3bb4aaf870 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -10,43 +10,43 @@ params {
// Workflow flags:
// Mandatory arguments
- input = null // No default input
- step = 'mapping' // Starts with mapping
+ input = null // No default input
+ step = 'mapping' // Starts with mapping
// Genome and references options
- genome = 'GATK.GRCh38'
- igenomes_base = 's3://ngi-igenomes/igenomes/'
+ genome = 'GATK.GRCh38'
+ igenomes_base = 's3://ngi-igenomes/igenomes/'
igenomes_ignore = false
- save_reference = false // Built references not saved
+ save_reference = false // Built references not saved
// Main options
- no_intervals = false // Intervals will be built from the fasta file
- nucleotides_per_second = 1000 // Default interval size
- tools = null // No default Variant_Calling or Annotation tools
- skip_tools = null // All tools (markduplicates + baserecalibrator + QC) are used by default
+ no_intervals = false // Intervals will be built from the fasta file
+ nucleotides_per_second = 1000 // Default interval size
+ tools = null // No default Variant_Calling or Annotation tools
+ skip_tools = null // All tools (markduplicates + baserecalibrator + QC) are used by default
+ split_fastq = 0 // FASTQ files will not be split by default by FASTP
- // Modify fastqs (trim/split)
- trim_fastq = false // No trimming
- clip_r1 = 0
- clip_r2 = 0
+ // Modify fastqs (trim/split) with FASTP
+ trim_fastq = false // No trimming
+ clip_r1 = 0
+ clip_r2 = 0
three_prime_clip_r1 = 0
three_prime_clip_r2 = 0
- trim_nextseq = 0
- save_trimmed = false
- split_fastq = 0 // FASTQ files will not be split by default
- save_split_fastqs = false
+ trim_nextseq = 0
+ save_trimmed = false
+ save_split_fastqs = false
// UMI tagged reads
- umi_read_structure = null // no UMI
- group_by_umi_strategy = 'Adjacency' // default strategy when UMI
+ umi_read_structure = null // no UMI
+ group_by_umi_strategy = 'Adjacency' // default strategy when running with UMI for GROUPREADSBYUMI
// Preprocessing
- aligner = 'bwa-mem' // Default is bwa-mem, bwa-mem2 and dragmap can be used too
- use_gatk_spark = null // GATK Spark implementation of their tools in local mode not used by default
- save_bam_mapped = false // Mapped BAMs not saved
- save_output_as_bam = false //Output files from preprocessing are saved as bam and not as cram files
- seq_center = null // No sequencing center to be written in read group CN field by aligner
- seq_platform = 'ILLUMINA' // Default platform written in read group PL field by aligner
+ aligner = 'bwa-mem' // Default is bwa-mem, bwa-mem2 and dragmap can be used too
+ use_gatk_spark = null // GATK Spark implementation of their tools in local mode not used by default
+ save_bam_mapped = false // Mapped BAMs not saved
+ save_output_as_bam = false //Output files from preprocessing are saved as bam and not as cram files
+ seq_center = null // No sequencing center to be written in read group CN field by aligner
+ seq_platform = 'ILLUMINA' // Default platform written in read group PL field by aligner
// Variant Calling
only_paired_variant_calling = false //if true, skips germline variant calling for normal-paired samples
@@ -62,31 +62,31 @@ params {
cf_mincov = 0 // ControlFreec default values
cf_minqual = 0 // ControlFreec default values
cf_window = null // by default we are not using this in Control-FREEC
- ignore_soft_clipped_bases = false // no --dont-use-soft-clipped-bases for GATK Mutect2
- wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers
+ ignore_soft_clipped_bases = false // no --dont-use-soft-clipped-bases for GATK Mutect2
+ wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers
// Annotation
- vep_out_format = 'vcf'
- vep_dbnsfp = null // dbnsfp plugin disabled within VEP
- dbnsfp = null // No dbnsfp processed file
- dbnsfp_tbi = null // No dbnsfp processed file index
- dbnsfp_consequence = null // No default consequence for dbnsfp plugin
- dbnsfp_fields = "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF" // Default fields for dbnsfp plugin
- vep_loftee = null // loftee plugin disabled within VEP
- vep_spliceai = null // spliceai plugin disabled within VEP
- spliceai_snv = null // No spliceai_snv file
- spliceai_snv_tbi = null // No spliceai_snv file index
- spliceai_indel = null // No spliceai_indel file
- spliceai_indel_tbi = null // No spliceai_indel file index
- vep_spliceregion = null // spliceregion plugin disabled within VEP
- snpeff_cache = null // No directory for snpEff cache
- vep_cache = null // No directory for VEP cache
- vep_include_fasta = false // Don't use fasta file for annotation with VEP
+ vep_out_format = 'vcf'
+ vep_dbnsfp = null // dbnsfp plugin disabled within VEP
+ dbnsfp = null // No dbnsfp processed file
+ dbnsfp_tbi = null // No dbnsfp processed file index
+ dbnsfp_consequence = null // No default consequence for dbnsfp plugin
+ dbnsfp_fields = "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF" // Default fields for dbnsfp plugin
+ vep_loftee = null // loftee plugin disabled within VEP
+ vep_spliceai = null // spliceai plugin disabled within VEP
+ spliceai_snv = null // No spliceai_snv file
+ spliceai_snv_tbi = null // No spliceai_snv file index
+ spliceai_indel = null // No spliceai_indel file
+ spliceai_indel_tbi = null // No spliceai_indel file index
+ vep_spliceregion = null // spliceregion plugin disabled within VEP
+ snpeff_cache = null // No directory for snpEff cache
+ vep_cache = null // No directory for VEP cache
+ vep_include_fasta = false // Don't use fasta file for annotation with VEP
// MultiQC options
- multiqc_config = null
- multiqc_title = null
- max_multiqc_email_size = '25.MB'
+ multiqc_config = null
+ multiqc_title = null
+ max_multiqc_email_size = '25.MB'
// Boilerplate options
outdir = 'results'
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 8aae3bcb19..6812c92df2 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -166,7 +166,7 @@
"fa_icon": "fas fa-tape",
"description": "Specify UMI read structure",
"hidden": true,
- "help_text": "One structure if UMI is present on one end (i.e. '2M11S+T'), or two structures separated by a blank space if UMIs a present on both ends (i.e. '2M11S+T 2M11S+T'); please note, this does not handle duplex-UMIs.\n\nIt is recommended to skip duplicate marking and base quality score recalibration. See `--skip_tools`."
+ "help_text": "One structure if UMI is present on one end (i.e. '+T 2M11S+T'), or two structures separated by a blank space if UMIs a present on both ends (i.e. '2M11S+T 2M11S+T'); please note, this does not handle duplex-UMIs.\n\nFor more info on UMI usage in the pipeline, also check docs [here](./docs/usage.md/#how-to-handle-umis)."
},
"group_by_umi_strategy": {
"type": "string",
diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf
index 18df95f69e..57fdf708a1 100644
--- a/subworkflows/local/germline_variant_calling.nf
+++ b/subworkflows/local/germline_variant_calling.nf
@@ -9,7 +9,7 @@ include { RUN_HAPLOTYPECALLER } from '../nf-core/variantcalling/haplotypecaller/
include { RUN_MANTA_GERMLINE } from '../nf-core/variantcalling/manta/germline/main.nf'
include { RUN_MPILEUP } from '../nf-core/variantcalling/mpileup/main'
include { RUN_STRELKA_SINGLE } from '../nf-core/variantcalling/strelka/single/main.nf'
-include { RUN_TIDDIT } from '../nf-core/variantcalling/tiddit/main.nf'
+include { RUN_TIDDIT } from '../nf-core/variantcalling/tiddit/single/main.nf'
workflow GERMLINE_VARIANT_CALLING {
take:
@@ -49,7 +49,15 @@ workflow GERMLINE_VARIANT_CALLING {
//If no interval file provided (0) then add empty list
intervals_new = num_intervals == 0 ? [] : intervals
- [[patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:num_intervals],
+ [[
+ data_type: meta.data_type,
+ id: meta.sample,
+ num_intervals: num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ],
cram, crai, intervals_new]
}
@@ -61,7 +69,15 @@ workflow GERMLINE_VARIANT_CALLING {
bed_new = num_intervals == 0 ? [] : bed_tbi[0]
tbi_new = num_intervals == 0 ? [] : bed_tbi[1]
- [[patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:num_intervals],
+ [[
+ data_type: meta.data_type,
+ id: meta.sample,
+ num_intervals: num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ],
cram, crai, bed_new, tbi_new]
}
@@ -71,8 +87,11 @@ workflow GERMLINE_VARIANT_CALLING {
[meta, cram, intervals]
}
- RUN_MPILEUP(cram_intervals_no_index,
- fasta)
+ RUN_MPILEUP(
+ cram_intervals_no_index,
+ fasta
+ )
+
mpileup_germline = RUN_MPILEUP.out.mpileup
ch_versions = ch_versions.mix(RUN_MPILEUP.out.versions)
}
@@ -85,17 +104,25 @@ workflow GERMLINE_VARIANT_CALLING {
[meta, [], cram]
}
- RUN_CNVKIT(cram_recalibrated_cnvkit_germline,
- fasta,
- fasta_fai,
- intervals_bed_combined,
- [])
+ RUN_CNVKIT(
+ cram_recalibrated_cnvkit_germline,
+ fasta,
+ fasta_fai,
+ intervals_bed_combined,
+ []
+ )
+
ch_versions = ch_versions.mix(RUN_CNVKIT.out.versions)
}
// DEEPVARIANT
if(tools.split(',').contains('deepvariant')){
- RUN_DEEPVARIANT(cram_recalibrated_intervals, dict, fasta, fasta_fai)
+ RUN_DEEPVARIANT(
+ cram_recalibrated_intervals,
+ dict,
+ fasta,
+ fasta_fai
+ )
deepvariant_vcf = Channel.empty().mix(RUN_DEEPVARIANT.out.deepvariant_vcf,RUN_DEEPVARIANT.out.deepvariant_gvcf)
ch_versions = ch_versions.mix(RUN_DEEPVARIANT.out.versions)
@@ -108,7 +135,13 @@ workflow GERMLINE_VARIANT_CALLING {
.map{ meta, cram, crai, intervals ->
[meta, cram, crai, [], [], intervals]
}
- RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, dict, fasta, fasta_fai)
+
+ RUN_FREEBAYES(
+ cram_recalibrated_intervals_freebayes,
+ dict,
+ fasta,
+ fasta_fai
+ )
freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf
ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions)
@@ -120,15 +153,17 @@ workflow GERMLINE_VARIANT_CALLING {
.map{ meta, cram, crai, intervals ->
[meta, cram, crai, intervals, []]
}
- RUN_HAPLOTYPECALLER(cram_recalibrated_intervals_haplotypecaller,
- fasta,
- fasta_fai,
- dict,
- dbsnp,
- dbsnp_tbi,
- intervals_bed_combined_haplotypec,
- known_sites,
- known_sites_tbi)
+ RUN_HAPLOTYPECALLER(
+ cram_recalibrated_intervals_haplotypecaller,
+ fasta,
+ fasta_fai,
+ dict,
+ dbsnp,
+ dbsnp_tbi,
+ intervals_bed_combined_haplotypec,
+ known_sites,
+ known_sites_tbi
+ )
haplotypecaller_vcf = RUN_HAPLOTYPECALLER.out.filtered_vcf
ch_versions = ch_versions.mix(RUN_HAPLOTYPECALLER.out.versions)
@@ -136,10 +171,12 @@ workflow GERMLINE_VARIANT_CALLING {
// MANTA
if (tools.split(',').contains('manta')){
- RUN_MANTA_GERMLINE (cram_recalibrated_intervals_gz_tbi,
- dict,
- fasta,
- fasta_fai)
+ RUN_MANTA_GERMLINE (
+ cram_recalibrated_intervals_gz_tbi,
+ dict,
+ fasta,
+ fasta_fai
+ )
manta_vcf = RUN_MANTA_GERMLINE.out.manta_vcf
ch_versions = ch_versions.mix(RUN_MANTA_GERMLINE.out.versions)
@@ -147,10 +184,12 @@ workflow GERMLINE_VARIANT_CALLING {
// STRELKA
if (tools.split(',').contains('strelka')){
- RUN_STRELKA_SINGLE(cram_recalibrated_intervals_gz_tbi,
- dict,
- fasta,
- fasta_fai)
+ RUN_STRELKA_SINGLE(
+ cram_recalibrated_intervals_gz_tbi,
+ dict,
+ fasta,
+ fasta_fai
+ )
strelka_vcf = RUN_STRELKA_SINGLE.out.strelka_vcf
ch_versions = ch_versions.mix(RUN_STRELKA_SINGLE.out.versions)
@@ -158,9 +197,11 @@ workflow GERMLINE_VARIANT_CALLING {
//TIDDIT
if (tools.split(',').contains('tiddit')){
- RUN_TIDDIT(cram_recalibrated,
- fasta,
- bwa)
+ RUN_TIDDIT(
+ cram_recalibrated,
+ fasta,
+ bwa
+ )
tiddit_vcf = RUN_TIDDIT.out.tiddit_vcf
ch_versions = ch_versions.mix(RUN_TIDDIT.out.versions)
diff --git a/subworkflows/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf
index 3656cc467e..f0a3707429 100644
--- a/subworkflows/local/pair_variant_calling.nf
+++ b/subworkflows/local/pair_variant_calling.nf
@@ -11,7 +11,7 @@ include { RUN_CNVKIT } from '../nf-core/variantca
include { RUN_MPILEUP as RUN_MPILEUP_NORMAL } from '../nf-core/variantcalling/mpileup/main'
include { RUN_MPILEUP as RUN_MPILEUP_TUMOR } from '../nf-core/variantcalling/mpileup/main'
include { RUN_ASCAT_SOMATIC } from '../nf-core/variantcalling/ascat/main'
-include { RUN_TIDDIT_SOMATIC } from '../nf-core/variantcalling/tiddit/tiddit_somatic/main'
+include { RUN_TIDDIT_SOMATIC } from '../nf-core/variantcalling/tiddit/somatic/main'
workflow PAIR_VARIANT_CALLING {
take:
@@ -56,7 +56,14 @@ workflow PAIR_VARIANT_CALLING {
//If no interval file provided (0) then add empty list
intervals_new = num_intervals == 0 ? [] : intervals
- [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id: meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:num_intervals],
+ [[
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id: meta.normal_id,
+ num_intervals: num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id,
+ ],
normal_cram, normal_crai, tumor_cram, tumor_crai, intervals_new]
}
@@ -68,20 +75,29 @@ workflow PAIR_VARIANT_CALLING {
bed_new = num_intervals == 0 ? [] : bed_tbi[0]
tbi_new = num_intervals == 0 ? [] : bed_tbi[1]
- [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id: meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:num_intervals],
+ [[
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id: meta.normal_id,
+ num_intervals: num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id,
+ ],
normal_cram, normal_crai, tumor_cram, tumor_crai, bed_new, tbi_new]
}
if (tools.split(',').contains('ascat')){
- RUN_ASCAT_SOMATIC( cram_pair,
- allele_files,
- loci_files,
- intervals_bed_combined,
- fasta,
- gc_file,
- rt_file)
+ RUN_ASCAT_SOMATIC(
+ cram_pair,
+ allele_files,
+ loci_files,
+ intervals_bed_combined,
+ fasta,
+ gc_file,
+ rt_file
+ )
ch_versions = ch_versions.mix(RUN_ASCAT_SOMATIC.out.versions)
@@ -97,8 +113,16 @@ workflow PAIR_VARIANT_CALLING {
.map {meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals ->
[meta, tumor_cram, intervals]
}
- RUN_MPILEUP_NORMAL(cram_normal_intervals_no_index, fasta)
- RUN_MPILEUP_TUMOR(cram_tumor_intervals_no_index, fasta)
+
+ RUN_MPILEUP_NORMAL(
+ cram_normal_intervals_no_index,
+ fasta
+ )
+
+ RUN_MPILEUP_TUMOR(
+ cram_tumor_intervals_no_index,
+ fasta
+ )
mpileup_normal = RUN_MPILEUP_NORMAL.out.mpileup
mpileup_tumor = RUN_MPILEUP_TUMOR.out.mpileup
@@ -108,14 +132,16 @@ workflow PAIR_VARIANT_CALLING {
[normal[0], normal[1], tumor[1], [], [], [], []]
}
- RUN_CONTROLFREEC_SOMATIC(controlfreec_input,
- fasta,
- fasta_fai,
- dbsnp,
- dbsnp_tbi,
- chr_files,
- mappability,
- intervals_bed_combined)
+ RUN_CONTROLFREEC_SOMATIC(
+ controlfreec_input,
+ fasta,
+ fasta_fai,
+ dbsnp,
+ dbsnp_tbi,
+ chr_files,
+ mappability,
+ intervals_bed_combined
+ )
ch_versions = ch_versions.mix(RUN_MPILEUP_NORMAL.out.versions)
ch_versions = ch_versions.mix(RUN_MPILEUP_TUMOR.out.versions)
@@ -128,25 +154,37 @@ workflow PAIR_VARIANT_CALLING {
[meta, tumor_cram, normal_cram]
}
- RUN_CNVKIT( cram_pair_cnvkit_somatic,
- fasta,
- fasta_fai,
- intervals_bed_combined,
- [])
+ RUN_CNVKIT(
+ cram_pair_cnvkit_somatic,
+ fasta,
+ fasta_fai,
+ intervals_bed_combined,
+ []
+ )
+
+ ch_versions = ch_versions.mix(RUN_CNVKIT.out.versions)
}
if (tools.split(',').contains('freebayes')){
- RUN_FREEBAYES_SOMATIC(cram_pair_intervals, dict, fasta, fasta_fai)
+
+ RUN_FREEBAYES_SOMATIC(
+ cram_pair_intervals,
+ dict,
+ fasta,
+ fasta_fai
+ )
freebayes_vcf = RUN_FREEBAYES_SOMATIC.out.freebayes_vcf
ch_versions = ch_versions.mix(RUN_FREEBAYES_SOMATIC.out.versions)
}
if (tools.split(',').contains('manta')) {
- RUN_MANTA_SOMATIC( cram_pair_intervals_gz_tbi,
- dict,
- fasta,
- fasta_fai)
+ RUN_MANTA_SOMATIC(
+ cram_pair_intervals_gz_tbi,
+ dict,
+ fasta,
+ fasta_fai
+ )
manta_vcf = RUN_MANTA_SOMATIC.out.manta_vcf
manta_candidate_small_indels_vcf = RUN_MANTA_SOMATIC.out.manta_candidate_small_indels_vcf
@@ -166,7 +204,14 @@ workflow PAIR_VARIANT_CALLING {
bed_new = num_intervals == 0 ? [] : bed_tbi[0]
tbi_new = num_intervals == 0 ? [] : bed_tbi[1]
- [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:num_intervals],
+ [[
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id: meta.normal_id,
+ num_intervals: num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id,
+ ],
normal_cram, normal_crai, tumor_cram, tumor_crai, vcf, vcf_tbi, bed_new, tbi_new]
}
@@ -177,10 +222,12 @@ workflow PAIR_VARIANT_CALLING {
}
}
- RUN_STRELKA_SOMATIC(cram_pair_strelka,
- dict,
- fasta,
- fasta_fai)
+ RUN_STRELKA_SOMATIC(
+ cram_pair_strelka,
+ dict,
+ fasta,
+ fasta_fai
+ )
strelka_vcf = Channel.empty().mix(RUN_STRELKA_SOMATIC.out.strelka_vcf)
ch_versions = ch_versions.mix(RUN_STRELKA_SOMATIC.out.versions)
@@ -207,7 +254,8 @@ workflow PAIR_VARIANT_CALLING {
germline_resource,
germline_resource_tbi,
panel_of_normals,
- panel_of_normals_tbi)
+ panel_of_normals_tbi
+ )
mutect2_vcf = GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING.out.filtered_vcf
ch_versions = ch_versions.mix(GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING.out.versions)
diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf
index f16220fd63..365fb0f6b6 100644
--- a/subworkflows/local/tumor_variant_calling.nf
+++ b/subworkflows/local/tumor_variant_calling.nf
@@ -10,7 +10,7 @@ include { RUN_STRELKA_SINGLE } from '../nf-core/variantcall
include { RUN_CONTROLFREEC_TUMORONLY } from '../nf-core/variantcalling/controlfreec/tumoronly/main.nf'
include { RUN_CNVKIT } from '../nf-core/variantcalling/cnvkit/main.nf'
include { RUN_MPILEUP } from '../nf-core/variantcalling/mpileup/main'
-include { RUN_TIDDIT } from '../nf-core/variantcalling/tiddit/main.nf'
+include { RUN_TIDDIT } from '../nf-core/variantcalling/tiddit/single/main.nf'
workflow TUMOR_ONLY_VARIANT_CALLING {
take:
@@ -51,7 +51,15 @@ workflow TUMOR_ONLY_VARIANT_CALLING {
//If no interval file provided (0) then add empty list
intervals_new = num_intervals == 0 ? [] : intervals
- [[patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:num_intervals],
+ [[
+ data_type: meta.data_type,
+ id: meta.sample,
+ num_intervals: num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ],
cram, crai, intervals_new]
}
@@ -63,7 +71,15 @@ workflow TUMOR_ONLY_VARIANT_CALLING {
bed_new = num_intervals == 0 ? [] : bed_tbi[0]
tbi_new = num_intervals == 0 ? [] : bed_tbi[1]
- [[patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:num_intervals],
+ [[
+ data_type: meta.data_type,
+ id: meta.sample,
+ num_intervals: num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ],
cram, crai, bed_new, tbi_new]
}
@@ -71,8 +87,11 @@ workflow TUMOR_ONLY_VARIANT_CALLING {
cram_intervals_no_index = cram_recalibrated_intervals.map { meta, cram, crai, intervals ->
[meta, cram, intervals]
}
- RUN_MPILEUP(cram_intervals_no_index,
- fasta)
+ RUN_MPILEUP(
+ cram_intervals_no_index,
+ fasta
+ )
+
ch_versions = ch_versions.mix(RUN_MPILEUP.out.versions)
}
@@ -83,14 +102,16 @@ workflow TUMOR_ONLY_VARIANT_CALLING {
}
RUN_CONTROLFREEC_TUMORONLY(
- controlfreec_input,
- fasta,
- fasta_fai,
- dbsnp,
- dbsnp_tbi,
- chr_files,
- mappability,
- intervals_bed_combined)
+ controlfreec_input,
+ fasta,
+ fasta_fai,
+ dbsnp,
+ dbsnp_tbi,
+ chr_files,
+ mappability,
+ intervals_bed_combined
+ )
+
ch_versions = ch_versions.mix(RUN_CONTROLFREEC_TUMORONLY.out.versions)
}
@@ -100,11 +121,13 @@ workflow TUMOR_ONLY_VARIANT_CALLING {
[meta, cram, []]
}
- RUN_CNVKIT ( cram_recalibrated_cnvkit_tumoronly,
- fasta,
- fasta_fai,
- [],
- cnvkit_reference )
+ RUN_CNVKIT (
+ cram_recalibrated_cnvkit_tumoronly,
+ fasta,
+ fasta_fai,
+ [],
+ cnvkit_reference
+ )
ch_versions = ch_versions.mix(RUN_CNVKIT.out.versions)
}
@@ -116,41 +139,54 @@ workflow TUMOR_ONLY_VARIANT_CALLING {
[meta, cram, crai, [], [], intervals]
}
- RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, dict, fasta, fasta_fai)
+ RUN_FREEBAYES(
+ cram_recalibrated_intervals_freebayes,
+ dict,
+ fasta,
+ fasta_fai
+ )
freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf
ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions)
}
if (tools.split(',').contains('mutect2')) {
- GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING(cram_recalibrated_intervals,
- fasta,
- fasta_fai,
- dict,
- germline_resource,
- germline_resource_tbi,
- panel_of_normals,
- panel_of_normals_tbi)
+ GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING(
+ cram_recalibrated_intervals,
+ fasta,
+ fasta_fai,
+ dict,
+ germline_resource,
+ germline_resource_tbi,
+ panel_of_normals,
+ panel_of_normals_tbi
+ )
mutect2_vcf = GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING.out.filtered_vcf
ch_versions = ch_versions.mix(GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING.out.versions)
}
if (tools.split(',').contains('manta')){
- RUN_MANTA_TUMORONLY(cram_recalibrated_intervals_gz_tbi,
- dict,
- fasta,
- fasta_fai)
+
+ RUN_MANTA_TUMORONLY(
+ cram_recalibrated_intervals_gz_tbi,
+ dict,
+ fasta,
+ fasta_fai
+ )
manta_vcf = RUN_MANTA_TUMORONLY.out.manta_vcf
ch_versions = ch_versions.mix(RUN_MANTA_TUMORONLY.out.versions)
}
if (tools.split(',').contains('strelka')) {
- RUN_STRELKA_SINGLE(cram_recalibrated_intervals_gz_tbi,
- dict,
- fasta,
- fasta_fai)
+
+ RUN_STRELKA_SINGLE(
+ cram_recalibrated_intervals_gz_tbi,
+ dict,
+ fasta,
+ fasta_fai
+ )
strelka_vcf = RUN_STRELKA_SINGLE.out.strelka_vcf
ch_versions = ch_versions.mix(RUN_STRELKA_SINGLE.out.versions)
@@ -158,9 +194,12 @@ workflow TUMOR_ONLY_VARIANT_CALLING {
//TIDDIT
if (tools.split(',').contains('tiddit')){
- RUN_TIDDIT(cram_recalibrated,
- fasta,
- bwa)
+
+ RUN_TIDDIT(
+ cram_recalibrated,
+ fasta,
+ bwa
+ )
tiddit_vcf = RUN_TIDDIT.out.tiddit_vcf
ch_versions = ch_versions.mix(RUN_TIDDIT.out.versions)
diff --git a/subworkflows/nf-core/gatk4/markduplicates_spark/main.nf b/subworkflows/nf-core/gatk4/markduplicates_spark/main.nf
index e980fe5b5e..0e5b46c4e4 100644
--- a/subworkflows/nf-core/gatk4/markduplicates_spark/main.nf
+++ b/subworkflows/nf-core/gatk4/markduplicates_spark/main.nf
@@ -51,6 +51,7 @@ workflow MARKDUPLICATES_SPARK {
ch_versions = ch_versions.mix(INDEX_MARKDUPLICATES.out.versions.first())
ch_versions = ch_versions.mix(BAM_TO_CRAM.out.versions.first())
ch_versions = ch_versions.mix(SAMTOOLS_CRAMTOBAM.out.versions)
+
emit:
cram = cram_markduplicates
qc = qc_reports
diff --git a/subworkflows/nf-core/gatk4/prepare_recalibration/main.nf b/subworkflows/nf-core/gatk4/prepare_recalibration/main.nf
index 254855f8d7..8253a29b1d 100644
--- a/subworkflows/nf-core/gatk4/prepare_recalibration/main.nf
+++ b/subworkflows/nf-core/gatk4/prepare_recalibration/main.nf
@@ -26,7 +26,15 @@ workflow PREPARE_RECALIBRATION {
//If no interval file provided (0) then add empty list
intervals_new = num_intervals == 0 ? [] : intervals
- [[patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:num_intervals],
+ [[
+ data_type: meta.data_type,
+ id: meta.sample,
+ num_intervals: num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ],
cram, crai, intervals_new]
}
@@ -37,7 +45,15 @@ workflow PREPARE_RECALIBRATION {
table_to_merge = BASERECALIBRATOR.out.table
.map{ meta, table ->
- new_meta = [patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:meta.num_intervals]
+ new_meta = [
+ data_type: meta.data_type,
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ]
[groupKey(new_meta, meta.num_intervals), table]
}.groupTuple()
@@ -54,7 +70,14 @@ workflow PREPARE_RECALIBRATION {
table_bqsr = table_to_merge.single.mix(GATHERBQSRREPORTS.out.table)
.map{ meta, table ->
// remove no longer necessary fields to make sure joining can be done correctly: num_intervals
- [[patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.sample, data_type:meta.data_type],
+ [[
+ data_type: meta.data_type,
+ id: meta.sample,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ],
table]
}
diff --git a/subworkflows/nf-core/gatk4/prepare_recalibration_spark/main.nf b/subworkflows/nf-core/gatk4/prepare_recalibration_spark/main.nf
index be868bd015..ba8a7f3992 100644
--- a/subworkflows/nf-core/gatk4/prepare_recalibration_spark/main.nf
+++ b/subworkflows/nf-core/gatk4/prepare_recalibration_spark/main.nf
@@ -26,7 +26,15 @@ workflow PREPARE_RECALIBRATION_SPARK {
//If no interval file provided (0) then add empty list
intervals_new = num_intervals == 0 ? [] : intervals
- [[patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:num_intervals],
+ [[
+ data_type: meta.data_type,
+ id: meta.sample,
+ num_intervals: num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ],
cram, crai, intervals_new]
}
@@ -37,7 +45,15 @@ workflow PREPARE_RECALIBRATION_SPARK {
table_to_merge = BASERECALIBRATOR_SPARK.out.table
.map{ meta, table ->
- new_meta = [patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:meta.num_intervals]
+ new_meta = [
+ data_type: meta.data_type,
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ]
[groupKey(new_meta, meta.num_intervals), table]
}.groupTuple()
@@ -54,7 +70,14 @@ workflow PREPARE_RECALIBRATION_SPARK {
table_bqsr = table_to_merge.single.mix(GATHERBQSRREPORTS.out.table)
.map{ meta, table ->
// remove no longer necessary fields to make sure joining can be done correctly: num_intervals
- [[patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.sample, data_type:meta.data_type],
+ [[
+ data_type: meta.data_type,
+ id: meta.sample,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ],
table]
}
diff --git a/subworkflows/nf-core/gatk4/recalibrate/main.nf b/subworkflows/nf-core/gatk4/recalibrate/main.nf
index 297de5b687..fab4d28ddc 100644
--- a/subworkflows/nf-core/gatk4/recalibrate/main.nf
+++ b/subworkflows/nf-core/gatk4/recalibrate/main.nf
@@ -24,7 +24,15 @@ workflow RECALIBRATE {
//If no interval file provided (0) then add empty list
intervals_new = num_intervals == 0 ? [] : intervals
- [[patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:num_intervals],
+ [[
+ data_type: meta.data_type,
+ id: meta.sample,
+ num_intervals: num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ],
cram, crai, recal, intervals_new]
}
@@ -36,7 +44,14 @@ workflow RECALIBRATE {
ch_cram_recal_out = MERGE_INDEX_CRAM.out.cram_crai.map{ meta, cram, crai ->
// remove no longer necessary fields to make sure joining can be done correctly: num_intervals
- [[patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.id, data_type:meta.data_type],
+ [[
+ data_type: meta.data_type,
+ id: meta.id,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ],
cram, crai]
}
diff --git a/subworkflows/nf-core/gatk4/recalibrate_spark/main.nf b/subworkflows/nf-core/gatk4/recalibrate_spark/main.nf
index 9afd0d55a4..8d0b3b5e50 100644
--- a/subworkflows/nf-core/gatk4/recalibrate_spark/main.nf
+++ b/subworkflows/nf-core/gatk4/recalibrate_spark/main.nf
@@ -24,7 +24,15 @@ workflow RECALIBRATE_SPARK {
//If no interval file provided (0) then add empty list
intervals_new = num_intervals == 0 ? [] : intervals
- [[patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:num_intervals],
+ [[
+ id: meta.sample,
+ data_type: meta.data_type,
+ num_intervals: num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ],
cram, crai, recal, intervals_new]
}
@@ -36,7 +44,14 @@ workflow RECALIBRATE_SPARK {
ch_cram_recal_out = MERGE_INDEX_CRAM.out.cram_crai.map{ meta, cram, crai ->
// remove no longer necessary fields to make sure joining can be done correctly: num_intervals
- [[patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.id, data_type:meta.data_type],
+ [[
+ id: meta.id,
+ data_type: meta.data_type,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ],
cram, crai]
}
diff --git a/subworkflows/nf-core/gatk4/single_sample_germline_variant_calling/main.nf b/subworkflows/nf-core/gatk4/single_sample_germline_variant_calling/main.nf
index d76405e4c5..9ea75b1a51 100644
--- a/subworkflows/nf-core/gatk4/single_sample_germline_variant_calling/main.nf
+++ b/subworkflows/nf-core/gatk4/single_sample_germline_variant_calling/main.nf
@@ -22,12 +22,14 @@ workflow GATK_SINGLE_SAMPLE_GERMLINE_VARIANT_CALLING{
[meta, vcf, tbi, [], new_intervals]
}
- CNNSCOREVARIANTS(cnn_in,
- fasta,
- fasta_fai,
- dict,
- [],
- [])
+ CNNSCOREVARIANTS(
+ cnn_in,
+ fasta,
+ fasta_fai,
+ dict,
+ [],
+ []
+ )
cnn_out = CNNSCOREVARIANTS.out.vcf.join(CNNSCOREVARIANTS.out.tbi).combine(intervals_bed_combined)
.map{ meta, cnn_vcf,cnn_tbi, intervals ->
@@ -35,16 +37,26 @@ workflow GATK_SINGLE_SAMPLE_GERMLINE_VARIANT_CALLING{
[meta, cnn_vcf, cnn_tbi, new_intervals]
}
- FILTERVARIANTTRANCHES(cnn_out,
- known_sites,
- known_sites_tbi,
- fasta,
- fasta_fai,
- dict)
+ FILTERVARIANTTRANCHES(
+ cnn_out,
+ known_sites,
+ known_sites_tbi,
+ fasta,
+ fasta_fai,
+ dict
+ )
// Figure out if using intervals or no_intervals
filtered_vcf = FILTERVARIANTTRANCHES.out.vcf.map{ meta, vcf ->
- [[patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"haplotypecaller"], vcf]
+ [[
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ variantcaller: "haplotypecaller"
+ ], vcf]
}
ch_versions = ch_versions.mix(CNNSCOREVARIANTS.out.versions)
diff --git a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf
index e0ba046ea0..26c3b2f839 100644
--- a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf
+++ b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf
@@ -65,7 +65,14 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING {
mutect2_vcf_branch.intervals
.map{ meta, vcf ->
- new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
+ new_meta = [
+ id:meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id:meta.normal_id,
+ num_intervals:meta.num_intervals,
+ patient:meta.patient,
+ sex:meta.sex,
+ tumor_id:meta.tumor_id
+ ]
[groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
@@ -85,7 +92,14 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING {
mutect2_stats_branch.intervals
.map{ meta, stats ->
- new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
+ new_meta = [
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id: meta.normal_id,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id
+ ]
[groupKey(new_meta, meta.num_intervals), stats]
}.groupTuple())
@@ -101,7 +115,14 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING {
mutect2_f1r2_branch.intervals
.map{ meta, f1r2 ->
- new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
+ new_meta = [
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id: meta.normal_id,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id,
+ ]
[groupKey(new_meta, meta.num_intervals), f1r2]
}.groupTuple(),
@@ -121,7 +142,14 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING {
GETPILEUPSUMMARIES_TUMOR ( pileup.tumor.map{
meta, cram, crai, intervals ->
- [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id, num_intervals:meta.num_intervals],
+ [[
+ id: meta.tumor_id,
+ normal_id: meta.normal_id,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id,
+ ],
cram, crai, intervals]
},
fasta, fai, dict, germline_resource_pileup, germline_resource_pileup_tbi )
@@ -129,7 +157,14 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING {
GETPILEUPSUMMARIES_NORMAL ( pileup.normal.map{
meta, cram, crai, intervals ->
- [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.normal_id, num_intervals:meta.num_intervals],
+ [[
+ id: meta.normal_id,
+ normal_id: meta.normal_id,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id,
+ ],
cram, crai, intervals]
},
fasta, fai, dict, germline_resource_pileup, germline_resource_pileup_tbi )
@@ -149,7 +184,14 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING {
GETPILEUPSUMMARIES_NORMAL.out.table
.map{ meta, table ->
- new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.normal_id, num_intervals:meta.num_intervals]
+ new_meta = [
+ id: meta.normal_id,
+ normal_id: meta.normal_id,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id,
+ ]
[groupKey(new_meta, meta.num_intervals), table]
}.groupTuple(),
@@ -159,14 +201,28 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING {
GATHERPILEUPSUMMARIES_NORMAL.out.table,
pileup_table_normal.no_intervals).map{ meta, table ->
- new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
+ new_meta = [
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id: meta.normal_id,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id,
+ ]
[new_meta, table]
}
GATHERPILEUPSUMMARIES_TUMOR(
GETPILEUPSUMMARIES_TUMOR.out.table
.map{ meta, table ->
- new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id, num_intervals:meta.num_intervals]
+ new_meta = [
+ id: meta.tumor_id,
+ normal_id: meta.normal_id,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id,
+ ]
[groupKey(new_meta, meta.num_intervals), table]
}.groupTuple(),
@@ -175,7 +231,14 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING {
gather_table_tumor = Channel.empty().mix(
GATHERPILEUPSUMMARIES_TUMOR.out.table,
pileup_table_tumor.no_intervals).map{ meta, table ->
- new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
+ new_meta = [
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id: meta.normal_id,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id,
+ ]
[new_meta, table]
}
diff --git a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf
index d0aab00db5..187bdaf9bc 100644
--- a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf
+++ b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf
@@ -64,7 +64,14 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING {
MERGE_MUTECT2(
mutect2_vcf_branch.intervals
.map{ meta, vcf ->
- new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals]
+ new_meta = [
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ]
[groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
@@ -82,7 +89,14 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING {
MERGEMUTECTSTATS(
mutect2_stats_branch.intervals
.map{ meta, stats ->
- new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals]
+ new_meta = [
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ]
[groupKey(new_meta, meta.num_intervals), stats]
}.groupTuple())
@@ -98,7 +112,14 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING {
Channel.empty().mix(
mutect2_f1r2_branch.intervals
.map{ meta, f1r2 ->
- new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals]
+ new_meta = [
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ]
[groupKey(new_meta, meta.num_intervals), f1r2]
}.groupTuple(),
@@ -107,7 +128,7 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING {
//
//Generate pileup summary table using getepileupsummaries.
//
- germline_resource_pileup = germline_resource_tbi ? germline_resource : Channel.empty() //Channel.empty().concat(germline_resource) //germline_resource.ifEmpty() ?: Channel.empty()
+ germline_resource_pileup = germline_resource_tbi ? germline_resource : Channel.empty()
germline_resource_pileup_tbi = germline_resource_tbi ?: Channel.empty()
GETPILEUPSUMMARIES ( input , fasta, fai, dict, germline_resource_pileup , germline_resource_pileup_tbi )
@@ -120,7 +141,14 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING {
GATHERPILEUPSUMMARIES(
GETPILEUPSUMMARIES.out.table
.map{ meta, table ->
- new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals]
+ new_meta = [
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ]
[groupKey(new_meta, meta.num_intervals), table]
}.groupTuple(),
@@ -168,7 +196,15 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING {
contamination_table = CALCULATECONTAMINATION.out.contamination // channel: [ val(meta), [ contamination ] ]
segmentation_table = CALCULATECONTAMINATION.out.segmentation // channel: [ val(meta), [ segmentation ] ]
- filtered_vcf = FILTERMUTECTCALLS.out.vcf.map{ meta, vcf -> [[patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"mutect2"]
+ filtered_vcf = FILTERMUTECTCALLS.out.vcf.map{ meta, vcf -> [[
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ variantcaller: "mutect2"
+ ]
, vcf] } // channel: [ val(meta), [ vcf ] ]
filtered_index = FILTERMUTECTCALLS.out.tbi // channel: [ val(meta), [ tbi ] ]
filtered_stats = FILTERMUTECTCALLS.out.stats // channel: [ val(meta), [ stats ] ]
diff --git a/subworkflows/nf-core/merge_index_cram.nf b/subworkflows/nf-core/merge_index_cram.nf
index 54bc59cc72..e838376ceb 100644
--- a/subworkflows/nf-core/merge_index_cram.nf
+++ b/subworkflows/nf-core/merge_index_cram.nf
@@ -18,7 +18,15 @@ workflow MERGE_INDEX_CRAM {
// Figuring out if there is one or more cram(s) from the same sample
ch_cram_to_merge = ch_cram.map{ meta, cram ->
- [groupKey([patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:meta.num_intervals],
+ [groupKey([
+ data_type: meta.data_type,
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ],
meta.num_intervals),
cram]
}.groupTuple()
diff --git a/subworkflows/nf-core/run_trimgalore.nf b/subworkflows/nf-core/run_trimgalore.nf
deleted file mode 100644
index 50a2034505..0000000000
--- a/subworkflows/nf-core/run_trimgalore.nf
+++ /dev/null
@@ -1,26 +0,0 @@
-//
-// Read QC and trimming
-//
-
-include { TRIMGALORE } from '../../modules/nf-core/modules/trimgalore/main'
-
-workflow RUN_TRIMGALORE {
- take:
- reads // channel: [ val(meta), [ reads ] ]
-
- main:
- ch_versions = Channel.empty()
-
- TRIMGALORE(reads)
-
- ch_versions = ch_versions.mix(TRIMGALORE.out.versions.first())
-
- emit:
- reads = TRIMGALORE.out.reads // channel: [ val(meta), [ reads ] ]
-
- trim_html = TRIMGALORE.out.html // channel: [ val(meta), [ html ] ]
- trim_zip = TRIMGALORE.out.zip // channel: [ val(meta), [ zip ] ]
- trim_log = TRIMGALORE.out.log // channel: [ val(meta), [ txt ] ]
-
- versions = ch_versions // channel: [ versions.yml ]
-}
diff --git a/subworkflows/nf-core/variantcalling/deepvariant/main.nf b/subworkflows/nf-core/variantcalling/deepvariant/main.nf
index 112ba56fdd..bc3fd6c230 100644
--- a/subworkflows/nf-core/variantcalling/deepvariant/main.nf
+++ b/subworkflows/nf-core/variantcalling/deepvariant/main.nf
@@ -39,7 +39,14 @@ workflow RUN_DEEPVARIANT {
deepvariant_vcf_out.intervals
.map{ meta, vcf ->
- new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals]
+ new_meta = [
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ]
[groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
@@ -49,7 +56,14 @@ workflow RUN_DEEPVARIANT {
deepvariant_gvcf_out.intervals
.map{ meta, vcf ->
- new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals]
+ new_meta = [
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ]
[groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
@@ -60,13 +74,29 @@ workflow RUN_DEEPVARIANT {
MERGE_DEEPVARIANT_GVCF.out.vcf,
deepvariant_gvcf_out.no_intervals)
.map{ meta, vcf ->
- [[patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"deepvariant"], vcf]
+ [[
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ variantcaller: "deepvariant"
+ ], vcf]
}
deepvariant_vcf = Channel.empty().mix(
MERGE_DEEPVARIANT_VCF.out.vcf,
deepvariant_vcf_out.no_intervals)
.map{ meta, vcf ->
- [[patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"deepvariant"], vcf]
+ [[
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ variantcaller: "deepvariant"
+ ], vcf]
}
ch_versions = ch_versions.mix(MERGE_DEEPVARIANT_GVCF.out.versions)
diff --git a/subworkflows/nf-core/variantcalling/freebayes/main.nf b/subworkflows/nf-core/variantcalling/freebayes/main.nf
index d53f855751..8e784af91c 100644
--- a/subworkflows/nf-core/variantcalling/freebayes/main.nf
+++ b/subworkflows/nf-core/variantcalling/freebayes/main.nf
@@ -34,8 +34,22 @@ workflow RUN_FREEBAYES {
bcftools_vcf_out.intervals
.map{ meta, vcf ->
- new_meta = meta.tumor_id ? [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
- : [patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals]
+ new_meta = meta.tumor_id ? [
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id: meta.normal_id,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id,
+ ]
+ : [
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ]
[groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
dict
@@ -46,7 +60,15 @@ workflow RUN_FREEBAYES {
MERGE_FREEBAYES.out.vcf,
bcftools_vcf_out.no_intervals)
.map{ meta, vcf ->
- [ [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.id, num_intervals:meta.num_intervals, variantcaller:"freebayes"],
+ [ [
+ id: meta.id,
+ normal_id: meta.normal_id,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id,
+ variantcaller: "freebayes"
+ ],
vcf]
}
diff --git a/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf b/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf
index 409523a76f..c41371fbb0 100644
--- a/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf
+++ b/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf
@@ -14,7 +14,7 @@ workflow RUN_HAPLOTYPECALLER {
dbsnp_tbi
intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped, no_intervals.bed if no_intervals
known_sites
- known_sites_tbi // channel: [optional]
+ known_sites_tbi // channel: [optional]
main:
@@ -46,7 +46,14 @@ workflow RUN_HAPLOTYPECALLER {
haplotypecaller_vcf_branch.intervals
.map{ meta, vcf ->
- new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals]
+ new_meta = [
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status
+ ]
[groupKey(new_meta, new_meta.num_intervals), vcf]
}.groupTuple(),
@@ -87,15 +94,27 @@ workflow RUN_HAPLOTYPECALLER {
// filtered_vcf = JOINT_GERMLINE.out.vcf
// ch_versions = ch_versions.mix(GATK_JOINT_GERMLINE_VARIANT_CALLING.out.versions)
} else {
- SINGLE_SAMPLE(haplotypecaller_vcf.join(haplotypecaller_tbi),
- fasta,
- fasta_fai,
- dict,
- intervals_bed_combined,
- known_sites,
- known_sites_tbi)
-
- filtered_vcf = SINGLE_SAMPLE.out.filtered_vcf.map{ meta, vcf-> [[patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"haplotypecaller"], vcf]}
+ SINGLE_SAMPLE(
+ haplotypecaller_vcf.join(haplotypecaller_tbi),
+ fasta,
+ fasta_fai,
+ dict,
+ intervals_bed_combined,
+ known_sites,
+ known_sites_tbi
+ )
+
+ filtered_vcf = SINGLE_SAMPLE.out.filtered_vcf.map{ meta, vcf ->
+ [[
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ variantcaller: "haplotypecaller"
+ ],
+ vcf]}
ch_versions = ch_versions.mix(SINGLE_SAMPLE.out.versions)
}
diff --git a/subworkflows/nf-core/variantcalling/manta/germline/main.nf b/subworkflows/nf-core/variantcalling/manta/germline/main.nf
index 7276e88ed2..81bb0c426b 100644
--- a/subworkflows/nf-core/variantcalling/manta/germline/main.nf
+++ b/subworkflows/nf-core/variantcalling/manta/germline/main.nf
@@ -39,7 +39,14 @@ workflow RUN_MANTA_GERMLINE {
manta_small_indels_vcf.intervals
.map{ meta, vcf ->
- [groupKey([patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals],
+ [groupKey([
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ],
meta.num_intervals),
vcf]
}.groupTuple(),
@@ -49,7 +56,14 @@ workflow RUN_MANTA_GERMLINE {
manta_sv_vcf.intervals
.map{ meta, vcf ->
- [groupKey([patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals],
+ [groupKey([
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ],
meta.num_intervals),
vcf]
@@ -60,7 +74,14 @@ workflow RUN_MANTA_GERMLINE {
manta_diploid_sv_vcf.intervals
.map{ meta, vcf ->
- [groupKey([patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals],
+ [groupKey([
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ status: meta.status,
+ sex: meta.sex,
+ ],
meta.num_intervals),
vcf]
@@ -73,7 +94,15 @@ workflow RUN_MANTA_GERMLINE {
MERGE_MANTA_DIPLOID.out.vcf,
manta_diploid_sv_vcf.no_intervals)
.map{ meta, vcf ->
- [[patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"manta"], vcf]
+ [[
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ status: meta.status,
+ sex: meta.sex,
+ variantcaller: "manta"],
+ vcf]
}
ch_versions = ch_versions.mix(MERGE_MANTA_DIPLOID.out.versions)
diff --git a/subworkflows/nf-core/variantcalling/manta/somatic/main.nf b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf
index 01704f4682..a1eb740832 100644
--- a/subworkflows/nf-core/variantcalling/manta/somatic/main.nf
+++ b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf
@@ -47,8 +47,15 @@ workflow RUN_MANTA_SOMATIC {
MERGE_MANTA_SV(
manta_candidate_small_indels_vcf.intervals.map{ meta, vcf ->
- [groupKey([patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals],
- meta.num_intervals),
+ [groupKey([
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id: meta.normal_id,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id
+ ],
+ meta.num_intervals),
vcf]
}.groupTuple(),
@@ -57,8 +64,15 @@ workflow RUN_MANTA_SOMATIC {
MERGE_MANTA_SMALL_INDELS(
manta_candidate_sv_vcf.intervals.map{ meta, vcf ->
- [groupKey([patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals],
- meta.num_intervals),
+ [groupKey([
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id: meta.normal_id,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id
+ ],
+ meta.num_intervals),
vcf]
}.groupTuple(),
@@ -66,10 +80,24 @@ workflow RUN_MANTA_SOMATIC {
MERGE_MANTA_DIPLOID(
manta_diploid_sv_vcf.intervals.map{ meta, vcf ->
- new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
-
- [groupKey([patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals],
- meta.num_intervals),
+ new_meta = [
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id: meta.normal_id,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id
+ ]
+
+ [groupKey([
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id: meta.normal_id,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id
+ ],
+ meta.num_intervals),
vcf]
}.groupTuple(),
@@ -78,8 +106,15 @@ workflow RUN_MANTA_SOMATIC {
MERGE_MANTA_SOMATIC(
manta_somatic_sv_vcf.intervals.map{ meta, vcf ->
- [groupKey([patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals],
- meta.num_intervals),
+ [groupKey([
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id: meta.normal_id,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id
+ ],
+ meta.num_intervals),
vcf]
}.groupTuple(),
@@ -92,7 +127,15 @@ workflow RUN_MANTA_SOMATIC {
manta_diploid_sv_vcf.no_intervals,
manta_somatic_sv_vcf.no_intervals
).map{ meta, vcf ->
- [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals, variantcaller:"manta"],
+ [[
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ num_intervals: meta.num_intervals,
+ normal_id: meta.normal_id,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id,
+ variantcaller: "manta"
+ ],
vcf]
}
@@ -101,7 +144,13 @@ workflow RUN_MANTA_SOMATIC {
MERGE_MANTA_SMALL_INDELS.out.vcf,
manta_candidate_small_indels_vcf.no_intervals
).map{ meta, vcf ->
- [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id],
+ [[
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id: meta.normal_id,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id,
+ ],
vcf]
}
@@ -109,7 +158,13 @@ workflow RUN_MANTA_SOMATIC {
MERGE_MANTA_SMALL_INDELS.out.tbi,
manta_candidate_small_indels_vcf_tbi.no_intervals
).map{ meta, vcf ->
- [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id],
+ [[
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id: meta.normal_id,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id
+ ],
vcf]
}
diff --git a/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf
index c966a8c498..852c272af7 100644
--- a/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf
+++ b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf
@@ -38,7 +38,14 @@ workflow RUN_MANTA_TUMORONLY {
MERGE_MANTA_SMALL_INDELS(
manta_small_indels_vcf.intervals.map{ meta, vcf ->
- [groupKey([patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals],
+ [groupKey([
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ],
meta.num_intervals),
vcf]
@@ -48,7 +55,14 @@ workflow RUN_MANTA_TUMORONLY {
MERGE_MANTA_SV(
manta_candidate_sv_vcf.intervals.map{ meta, vcf ->
- [groupKey([patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals],
+ [groupKey([
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status
+ ],
meta.num_intervals),
vcf]
@@ -58,7 +72,14 @@ workflow RUN_MANTA_TUMORONLY {
MERGE_MANTA_TUMOR(
manta_tumor_sv_vcf.intervals.map{ meta, vcf ->
- [groupKey( [patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals],
+ [groupKey( [
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ],
meta.num_intervals),
vcf]
@@ -71,8 +92,16 @@ workflow RUN_MANTA_TUMORONLY {
MERGE_MANTA_TUMOR.out.vcf,
manta_tumor_sv_vcf.no_intervals
).map{ meta, vcf ->
- [[patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"manta"],
- vcf]
+ [[
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ variantcaller: "manta"
+ ],
+ vcf]
}
ch_versions = ch_versions.mix(MERGE_MANTA_SV.out.versions)
diff --git a/subworkflows/nf-core/variantcalling/mpileup/main.nf b/subworkflows/nf-core/variantcalling/mpileup/main.nf
index e5b49aed7c..c4868d964e 100644
--- a/subworkflows/nf-core/variantcalling/mpileup/main.nf
+++ b/subworkflows/nf-core/variantcalling/mpileup/main.nf
@@ -19,8 +19,22 @@ workflow RUN_MPILEUP {
//Merge mpileup only when intervals and natural order sort them
CAT_MPILEUP(mpileup.intervals
.map{ meta, pileup ->
- new_meta = meta.tumor_id ? [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] // not annotated, so no variantcaller necessary
- : [patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals]
+ new_meta = meta.tumor_id ? [
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id: meta.normal_id,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id,
+ ] // not annotated, so no variantcaller necessary
+ : [
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ status: meta.status,
+ sex: meta.sex,
+ ]
[groupKey(new_meta, meta.num_intervals), pileup]
}
.groupTuple(sort:true))
diff --git a/subworkflows/nf-core/variantcalling/strelka/single/main.nf b/subworkflows/nf-core/variantcalling/strelka/single/main.nf
index 2bf8baa155..bc6b0b70f9 100644
--- a/subworkflows/nf-core/variantcalling/strelka/single/main.nf
+++ b/subworkflows/nf-core/variantcalling/strelka/single/main.nf
@@ -29,7 +29,14 @@ workflow RUN_STRELKA_SINGLE {
MERGE_STRELKA(
strelka_vcf.intervals
.map{ meta, vcf ->
- new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals]
+ new_meta = [
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status
+ ]
[groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
@@ -40,7 +47,14 @@ workflow RUN_STRELKA_SINGLE {
strelka_genome_vcf.intervals
.map{ meta, vcf ->
- [groupKey([patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals],
+ [groupKey([
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ ],
meta.num_intervals),
vcf]
@@ -54,7 +68,15 @@ workflow RUN_STRELKA_SINGLE {
MERGE_STRELKA.out.vcf,
strelka_vcf.no_intervals)
.map{ meta, vcf ->
- [[patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"strelka"], vcf]
+ [[
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ variantcaller: "strelka"
+ ],vcf]
}
ch_versions = ch_versions.mix(MERGE_STRELKA.out.versions)
diff --git a/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf b/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf
index 1fcd00267e..8686d3dcb5 100644
--- a/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf
+++ b/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf
@@ -29,7 +29,14 @@ workflow RUN_STRELKA_SOMATIC {
// Only when using intervals
MERGE_STRELKA_SNVS(strelka_vcf_snvs.intervals.map{ meta, vcf ->
- [groupKey([patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals],
+ [groupKey([
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id: meta.normal_id,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id,
+ ],
meta.num_intervals),
vcf]
@@ -38,7 +45,14 @@ workflow RUN_STRELKA_SOMATIC {
MERGE_STRELKA_INDELS(strelka_vcf_indels.intervals.map{ meta, vcf ->
- [groupKey([patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals],
+ [groupKey([
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id: meta.normal_id,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id,
+ ],
meta.num_intervals),
vcf]
}.groupTuple(),
@@ -52,7 +66,15 @@ workflow RUN_STRELKA_SOMATIC {
strelka_vcf_indels.no_intervals
)
.map{ meta, vcf ->
- [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals, variantcaller:"strelka"],
+ [[
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id: meta.normal_id,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id,
+ variantcaller: "strelka"
+ ],
vcf]
}
diff --git a/subworkflows/nf-core/variantcalling/tiddit/main.nf b/subworkflows/nf-core/variantcalling/tiddit/main.nf
deleted file mode 100644
index d4fc7eca2a..0000000000
--- a/subworkflows/nf-core/variantcalling/tiddit/main.nf
+++ /dev/null
@@ -1,35 +0,0 @@
-include { TABIX_BGZIPTABIX as TABIX_BGZIP_TIDDIT_SV } from '../../../../modules/nf-core/modules/tabix/bgziptabix/main'
-include { TIDDIT_SV } from '../../../../modules/nf-core/modules/tiddit/sv/main'
-
-workflow RUN_TIDDIT {
- take:
- cram_recalibrated
- fasta
- bwa
-
- main:
-
- ch_versions = Channel.empty()
- TIDDIT_SV(
- cram_recalibrated,
- fasta,
- bwa
- )
-
- TABIX_BGZIP_TIDDIT_SV(TIDDIT_SV.out.vcf)
- tiddit_ploidy = TIDDIT_SV.out.ploidy
- tiddit_vcf_gz = TABIX_BGZIP_TIDDIT_SV.out.gz_tbi.map{ meta, gz, tbi ->
-
- new_meta = meta.tumor_id ? [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals, variantcaller:'tiddit']
- : [patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:'tiddit']
- [new_meta, gz]}
-
- ch_versions = ch_versions.mix(TABIX_BGZIP_TIDDIT_SV.out.versions)
- ch_versions = ch_versions.mix(TIDDIT_SV.out.versions)
-
- emit:
- versions = ch_versions
-
- tiddit_vcf = tiddit_vcf_gz
- tiddit_ploidy
-}
diff --git a/subworkflows/nf-core/variantcalling/tiddit/single/main.nf b/subworkflows/nf-core/variantcalling/tiddit/single/main.nf
new file mode 100644
index 0000000000..1615a874e2
--- /dev/null
+++ b/subworkflows/nf-core/variantcalling/tiddit/single/main.nf
@@ -0,0 +1,51 @@
+include { TABIX_BGZIPTABIX as TABIX_BGZIP_TIDDIT_SV } from '../../../../../modules/nf-core/modules/tabix/bgziptabix/main'
+include { TIDDIT_SV } from '../../../../../modules/nf-core/modules/tiddit/sv/main'
+
+workflow RUN_TIDDIT {
+ take:
+ cram_recalibrated
+ fasta
+ bwa
+
+ main:
+
+ ch_versions = Channel.empty()
+ TIDDIT_SV(
+ cram_recalibrated,
+ fasta,
+ bwa
+ )
+
+ TABIX_BGZIP_TIDDIT_SV(TIDDIT_SV.out.vcf)
+ tiddit_ploidy = TIDDIT_SV.out.ploidy
+ tiddit_vcf_gz = TABIX_BGZIP_TIDDIT_SV.out.gz_tbi.map{ meta, gz, tbi ->
+
+ new_meta = meta.tumor_id ? [
+ id: meta.tumor_id + "_vs_" + meta.normal_id,
+ normal_id: meta.normal_id,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sex: meta.sex,
+ tumor_id: meta.tumor_id,
+ variantcaller: 'tiddit'
+ ]
+ : [
+ id: meta.sample,
+ num_intervals: meta.num_intervals,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status,
+ variantcaller: 'tiddit'
+ ]
+ [new_meta, gz]}
+
+ ch_versions = ch_versions.mix(TABIX_BGZIP_TIDDIT_SV.out.versions)
+ ch_versions = ch_versions.mix(TIDDIT_SV.out.versions)
+
+ emit:
+ versions = ch_versions
+
+ tiddit_vcf = tiddit_vcf_gz
+ tiddit_ploidy
+}
diff --git a/subworkflows/nf-core/variantcalling/tiddit/tiddit_somatic/main.nf b/subworkflows/nf-core/variantcalling/tiddit/somatic/main.nf
similarity index 85%
rename from subworkflows/nf-core/variantcalling/tiddit/tiddit_somatic/main.nf
rename to subworkflows/nf-core/variantcalling/tiddit/somatic/main.nf
index 73d0e36511..89531819ad 100644
--- a/subworkflows/nf-core/variantcalling/tiddit/tiddit_somatic/main.nf
+++ b/subworkflows/nf-core/variantcalling/tiddit/somatic/main.nf
@@ -1,5 +1,5 @@
-include { RUN_TIDDIT as RUN_TIDDIT_NORMAL } from '../main.nf'
-include { RUN_TIDDIT as RUN_TIDDIT_TUMOR } from '../main.nf'
+include { RUN_TIDDIT as RUN_TIDDIT_NORMAL } from '../single/main.nf'
+include { RUN_TIDDIT as RUN_TIDDIT_TUMOR } from '../single/main.nf'
include { SVDB_MERGE } from '../../../../../modules/nf-core/modules/svdb/merge/main.nf'
workflow RUN_TIDDIT_SOMATIC {
@@ -12,8 +12,10 @@ workflow RUN_TIDDIT_SOMATIC {
main:
ch_versions = Channel.empty()
+
RUN_TIDDIT_NORMAL(cram_normal, fasta, bwa)
RUN_TIDDIT_TUMOR(cram_tumor, fasta, bwa)
+
SVDB_MERGE(RUN_TIDDIT_NORMAL.out.tiddit_vcf.join(RUN_TIDDIT_TUMOR.out.tiddit_vcf)
.map{meta, vcf_normal, vcf_tumor ->
[meta, [vcf_normal, vcf_tumor]]
diff --git a/tests/config/nextflow.config b/tests/config/nextflow.config
deleted file mode 100644
index 74e1c2d513..0000000000
--- a/tests/config/nextflow.config
+++ /dev/null
@@ -1,35 +0,0 @@
-params {
- outdir = "output/"
- publish_dir_mode = "copy"
- enable_conda = false
- singularity_pull_docker_container = false
- max_cpus = 2
- max_memory = 6.GB
- max_time = 6.h
-}
-
-process {
- cpus = 2
- memory = 6.GB
- time = 48.h
-}
-
-if ("$PROFILE" == "singularity") {
- singularity.enabled = true
- singularity.autoMounts = true
-} else if ("$PROFILE" == "conda") {
- params.enable_conda = true
-} else {
- docker.enabled = true
- docker.runOptions = '-u \$(id -u):\$(id -g)'
-}
-
-// Load test_data.config containing paths to test data
-includeConfig 'test_data.config'
-
-// Load modules.config for default module params
-includeConfig '../../conf/modules.config'
-
-manifest {
- nextflowVersion = '!>=21.10.3'
-}
diff --git a/tests/config/pytest_software.yml b/tests/config/pytest_software.yml
deleted file mode 100644
index b215cffcc6..0000000000
--- a/tests/config/pytest_software.yml
+++ /dev/null
@@ -1,4 +0,0 @@
-markduplicates:
- - modules/nf-core/software/gatk4/markduplicates/main.nf
- - subworkflow/local/markduplicates.nf
- - tests/subworkflow/local/markduplicates/**
diff --git a/tests/config/test_data.config b/tests/config/test_data.config
deleted file mode 100644
index 665947a594..0000000000
--- a/tests/config/test_data.config
+++ /dev/null
@@ -1,15 +0,0 @@
-def test_data_dir = "${launchDir}/tests/data/"
-def nf_core_modules_data = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/"
-
-try {
- includeConfig "https://raw.githubusercontent.com/nf-core/modules/master/tests/config/test_data.config"
-} catch (Exception e) {
- System.err.println("WARNING: Could not load nf-core/modules test data config")
-}
-
-params {
- test_data {
- 'external' {
- }
- }
-}
diff --git a/tests/subworkflows/local/annotate/main.nf b/tests/subworkflows/local/annotate/main.nf
deleted file mode 100644
index a1138bd994..0000000000
--- a/tests/subworkflows/local/annotate/main.nf
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/usr/bin/env nextflow
-
-nextflow.enable.dsl = 2
-
-// Don't overwrite global params.modules, create a copy instead and use that within the main script.
-def modules = params.modules.clone()
-
-include { ANNOTATE } from '../../../../subworkflows/local/annotate' addParams(
- annotation_cache: false,
- bgziptabix_merge_vep_options: modules['bgziptabix_merge_vep'],
- bgziptabix_snpeff_options: modules['bgziptabix_snpeff'],
- bgziptabix_vep_options: modules['bgziptabix_vep'],
- merge_vep_options: modules['merge_vep'],
- snpeff_options: modules['snpeff'],
- snpeff_tag: "${modules['snpeff'].tag_base}.WBcel235",
- vep_options: modules['vep'],
- vep_tag: "${modules['vep'].tag_base}.WBcel235"
-)
-
-workflow test_annotate {
- input = [[id: 'test'],
- [file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)]]
-
- ANNOTATE(
- input,
- ["snpeff","vep","merge"],
- "WBcel235.99",
- [],
- "WBcel235",
- "caenorhabditis_elegans",
- "104",
- [])
-}
diff --git a/tests/subworkflows/local/annotate/test.yml b/tests/subworkflows/local/annotate/test.yml
deleted file mode 100644
index a0f9caf99b..0000000000
--- a/tests/subworkflows/local/annotate/test.yml
+++ /dev/null
@@ -1,11 +0,0 @@
-- name: subworkflow annotate
- command: nextflow run ./tests/subworkflows/local/annotate/ -entry test_annotate -c tests/config/nextflow.config
- tags:
- - annotate
- files:
- - path: output/annotation/test/test_snpEff.ann.gz
- - path: output/annotation/test/test_snpEff.ann.gz.tbi
- - path: output/annotation/test/test_snpEff_VEP.ann.gz
- - path: output/annotation/test/test_snpEff_VEP.ann.gz.tbi
- - path: output/annotation/test/test_VEP.ann.gz
- - path: output/annotation/test/test_VEP.ann.gz.tbi
diff --git a/tests/subworkflows/nf-core/markduplicates/main.nf b/tests/subworkflows/nf-core/markduplicates/main.nf
deleted file mode 100644
index 2e61829e0a..0000000000
--- a/tests/subworkflows/nf-core/markduplicates/main.nf
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env nextflow
-
-nextflow.enable.dsl = 2
-
-// Don't overwrite global params.modules, create a copy instead and use that within the main script.
-def modules = params.modules.clone()
-
-include { MARKDUPLICATES } from '../../../../subworkflows/nf-core/markduplicates' addParams(
- markduplicates_options: modules['markduplicates'],
- markduplicatesspark_options: modules['markduplicatesspark']
-)
-
-workflow test_markduplicates {
- input = [[id: 'test'],
- [file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)],
- [file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)]]
-
- MARKDUPLICATES ( input, false, true )
-}
diff --git a/tests/subworkflows/nf-core/markduplicates/test.yml b/tests/subworkflows/nf-core/markduplicates/test.yml
deleted file mode 100644
index 35deb59d70..0000000000
--- a/tests/subworkflows/nf-core/markduplicates/test.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-- name: subworkflow markduplicates
- command: nextflow run ./tests/subworkflows/nf-core/markduplicates/ -entry test_markduplicates -c tests/config/nextflow.config
- tags:
- - markduplicates_subworkflow
- - gatk4
- files:
- - path: output/preprocessing/test/markduplicates/test.md.bam
- - path: output/preprocessing/test/markduplicates/test.md.bam.bai
diff --git a/tests/subworkflows/nf-core/snpeff_annotate/main.nf b/tests/subworkflows/nf-core/snpeff_annotate/main.nf
deleted file mode 100644
index 56af76c5e6..0000000000
--- a/tests/subworkflows/nf-core/snpeff_annotate/main.nf
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/env nextflow
-
-nextflow.enable.dsl = 2
-
-// Don't overwrite global params.modules, create a copy instead and use that within the main script.
-def modules = params.modules.clone()
-
-include { SNPEFF_ANNOTATE } from '../../../../subworkflows/nf-core/snpeff_annotate' addParams(
- bgziptabix_snpeff_options: modules['bgziptabix_snpeff'],
- snpeff_options: modules['snpeff'],
- snpeff_tag: "${modules['snpeff'].tag_base}.WBcel235",
- use_cache: false
-)
-
-workflow test_snpeff_annotate {
- input = [[id: 'test'],
- [file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)]]
-
- SNPEFF_ANNOTATE (
- input,
- "WBcel235.99",
- [])
-}
diff --git a/tests/subworkflows/nf-core/snpeff_annotate/test.yml b/tests/subworkflows/nf-core/snpeff_annotate/test.yml
deleted file mode 100644
index 6f7f4938c6..0000000000
--- a/tests/subworkflows/nf-core/snpeff_annotate/test.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-- name: subworkflow snpeff_annotate
- command: nextflow run ./tests/subworkflows/nf-core/snpeff_annotate/ -entry test_snpeff_annotate -c tests/config/nextflow.config
- tags:
- - snpeff_annotate
- files:
- - path: output/annotation/test/test_snpEff.ann.gz
- - path: output/annotation/test/test_snpEff.ann.gz.tbi
diff --git a/tests/subworkflows/nf-core/vep_annotate/main.nf b/tests/subworkflows/nf-core/vep_annotate/main.nf
deleted file mode 100644
index 268180ef3c..0000000000
--- a/tests/subworkflows/nf-core/vep_annotate/main.nf
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env nextflow
-
-nextflow.enable.dsl = 2
-
-// Don't overwrite global params.modules, create a copy instead and use that within the main script.
-def modules = params.modules.clone()
-
-include { VEP_ANNOTATE } from '../../../../subworkflows/nf-core/vep_annotate' addParams(
- bgziptabix_vep_options: modules['bgziptabix_vep'],
- use_cache: false,
- vep_options: modules['vep'],
- vep_tag: "${modules['vep'].tag_base}.WBcel235"
-)
-
-workflow test_vep_annotate {
- input = [[id: 'test'],
- [file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)]]
-
- VEP_ANNOTATE (
- input,
- "WBcel235",
- "caenorhabditis_elegans",
- "104",
- [])
-}
diff --git a/tests/subworkflows/nf-core/vep_annotate/test.yml b/tests/subworkflows/nf-core/vep_annotate/test.yml
deleted file mode 100644
index 78c4f47439..0000000000
--- a/tests/subworkflows/nf-core/vep_annotate/test.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-- name: subworkflow vep_annotate
- command: nextflow run ./tests/subworkflows/nf-core/vep_annotate/ -entry test_vep_annotate -c tests/config/nextflow.config
- tags:
- - vep_annotate
- files:
- - path: output/annotation/test/test_VEP.ann.gz
- - path: output/annotation/test/test_VEP.ann.gz.tbi
diff --git a/workflows/sarek.nf b/workflows/sarek.nf
index 3f67b2348c..ed1a4f397f 100644
--- a/workflows/sarek.nf
+++ b/workflows/sarek.nf
@@ -60,7 +60,7 @@ if (params.wes && !params.step == 'annotate') {
else log.warn("Intervals file was provided without parameter `--wes`: Pipeline will assume this is Whole-Genome-Sequencing data.")
} else if (params.intervals && !params.intervals.endsWith("bed") && !params.intervals.endsWith("interval_list")) exit 1, "Intervals file must end with .bed or .interval_list"
-if(params.step == 'mapping' && params.aligner.contains("dragmap") && !(params.skip_tools && params.skip_tools.contains("baserecalibrator"))){
+if(params.step == 'mapping' && params.aligner.contains("dragmap") && !(params.skip_tools && params.skip_tools.split(',').contains("baserecalibrator"))){
log.warn("DragMap was specified as aligner. Base recalibration is not contained in --skip_tools. It is recommended to skip baserecalibration when using DragMap\nhttps://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode")
}
@@ -180,7 +180,7 @@ if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && p
}
// Initialize value channels based on params, not defined within the params.genomes[params.genome] scope
-umi_read_structure = params.umi_read_structure ? "${params.umi_read_structure} ${params.umi_read_structure}" : Channel.empty()
+umi_read_structure = params.umi_read_structure ? "${params.umi_read_structure}" : Channel.empty()
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -371,6 +371,7 @@ workflow SAREK {
}.set{ch_input_sample_type}
// convert any bam input to fastq
+ // Fasta are not needed when converting bam to fastq -> []
ALIGNMENT_TO_FASTQ_INPUT(ch_input_sample_type.bam, [])
// gather fastq (inputed or converted)
@@ -388,17 +389,19 @@ workflow SAREK {
if (!(params.skip_tools && params.skip_tools.split(',').contains('fastqc'))) {
RUN_FASTQC(ch_input_fastq)
- ch_reports = ch_reports.mix(RUN_FASTQC.out.fastqc_zip.collect{it[1]}.ifEmpty([]))
+ ch_reports = ch_reports.mix(RUN_FASTQC.out.fastqc_zip.collect{meta, logs -> logs})
ch_versions = ch_versions.mix(RUN_FASTQC.out.versions)
}
// UMI consensus calling
if (params.umi_read_structure) {
- CREATE_UMI_CONSENSUS(ch_input_fastq,
+ CREATE_UMI_CONSENSUS(
+ ch_input_fastq,
fasta,
ch_map_index,
umi_read_structure,
- params.group_by_umi_strategy)
+ params.group_by_umi_strategy
+ )
bamtofastq = CREATE_UMI_CONSENSUS.out.consensusbam.map{meta, bam -> [meta,bam,[]]}
@@ -416,15 +419,31 @@ workflow SAREK {
// Trimming and/or splitting
if (params.trim_fastq || params.split_fastq > 0) {
- FASTP(ch_reads_fastp, false, false)
- ch_reports = ch_reports.mix(FASTP.out.json.collect{it[1]}.ifEmpty([]),FASTP.out.html.collect{it[1]}.ifEmpty([]))
+ save_trimmed_fail = false
+ save_merged = false
+ FASTP(ch_reads_fastp, save_trimmed_fail, save_merged)
+
+ ch_reports = ch_reports.mix(
+ FASTP.out.json.collect{meta, json -> json},
+ FASTP.out.html.collect{meta, html -> html}
+ )
if(params.split_fastq){
ch_reads_to_map = FASTP.out.reads.map{ key, reads ->
read_files = reads.sort{ a,b -> a.getName().tokenize('.')[0] <=> b.getName().tokenize('.')[0] }.collate(2)
- [[patient: key.patient, sample:key.sample, sex:key.sex, status:key.status, id:key.id, numLanes:key.numLanes, read_group:key.read_group, data_type:key.data_type, size:read_files.size()],
+ [[
+ data_type:key.data_type,
+ id:key.id,
+ numLanes:key.numLanes,
+ patient: key.patient,
+ read_group:key.read_group,
+ sample:key.sample,
+ sex:key.sex,
+ size:read_files.size(),
+ status:key.status,
+ ],
read_files]
}.transpose()
}else{
@@ -442,11 +461,22 @@ workflow SAREK {
// update ID when no multiple lanes or splitted fastqs
new_id = meta.size * meta.numLanes == 1 ? meta.sample : meta.id
- [[patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:new_id, numLanes:meta.numLanes, read_group:meta.read_group, data_type:meta.data_type, size:meta.size],
+ [[
+ data_type: meta.data_type,
+ id: new_id,
+ numLanes: meta.numLanes,
+ patient: meta.patient,
+ read_group: meta.read_group,
+ sample: meta.sample,
+ sex: meta.sex,
+ size: meta.size,
+ status: meta.status,
+ ],
reads]
}
- GATK4_MAPPING(ch_reads_to_map, ch_map_index, true)
+ sort_bam = true
+ GATK4_MAPPING(ch_reads_to_map, ch_map_index, sort_bam)
// Grouping the bams from the same samples not to stall the workflow
ch_bam_mapped = GATK4_MAPPING.out.bam.map{ meta, bam ->
@@ -459,7 +489,15 @@ workflow SAREK {
// read_group: Now in the BAM header
// numLanes: Was only needed for mapping
// size: Was only needed for mapping
- new_meta = [patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.sample, data_type:"bam"]
+ new_meta = [
+ id:meta.sample,
+ data_type:"bam",
+ patient:meta.patient,
+ sample:meta.sample,
+ sex:meta.sex,
+ status:meta.status,
+ ]
+
// Use groupKey to make sure that the correct group can advance as soon as it is complete
// and not stall the workflow until all reads from all channels are mapped
[ groupKey(new_meta, numLanes * size), bam]
@@ -506,19 +544,19 @@ workflow SAREK {
ch_input_sample.branch{
bam: it[0].data_type == "bam"
cram: it[0].data_type == "cram"
- }.set{convert}
+ }.set{ch_convert}
- ch_bam_for_markduplicates = convert.bam.map{ meta, bam, bai -> [meta, bam]}
+ ch_bam_for_markduplicates = ch_convert.bam.map{ meta, bam, bai -> [meta, bam]}
//In case Markduplicates is run convert CRAM files to BAM, because the tool only runs on BAM files. MD_SPARK does run on CRAM but is a lot slower
if (!(params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))){
- SAMTOOLS_CRAMTOBAM(convert.cram, fasta, fasta_fai)
+ SAMTOOLS_CRAMTOBAM(ch_convert.cram, fasta, fasta_fai)
ch_versions = ch_versions.mix(SAMTOOLS_CRAMTOBAM.out.versions)
ch_bam_for_markduplicates = ch_bam_for_markduplicates.mix(SAMTOOLS_CRAMTOBAM.out.alignment_index.map{ meta, bam, bai -> [meta, bam]})
} else {
- ch_input_cram_indexed = convert.cram
+ ch_input_cram_indexed = ch_convert.cram
}
}
@@ -527,9 +565,10 @@ workflow SAREK {
// ch_bam_indexed will countain bam mapped with GATK4_MAPPING when step is mapping
// which are then merged and indexed
// Or bams that are specified in the samplesheet.csv when step is prepare_recalibration
- ch_bam_indexed = params.step == 'mapping' ? MERGE_INDEX_BAM.out.bam_bai : convert.bam
+ ch_bam_indexed = params.step == 'mapping' ? MERGE_INDEX_BAM.out.bam_bai : ch_convert.bam
- BAM_TO_CRAM(ch_bam_indexed,
+ BAM_TO_CRAM(
+ ch_bam_indexed,
ch_input_cram_indexed,
fasta,
fasta_fai,
@@ -538,12 +577,13 @@ workflow SAREK {
ch_cram_no_markduplicates_restart = BAM_TO_CRAM.out.cram_converted
// Gather QC reports
- ch_reports = ch_reports.mix(BAM_TO_CRAM.out.qc.collect{it[1]}.ifEmpty([]))
+ ch_reports = ch_reports.mix(BAM_TO_CRAM.out.qc.collect{meta, report -> report})
// Gather used softwares versions
ch_versions = ch_versions.mix(BAM_TO_CRAM.out.versions)
} else if (params.use_gatk_spark && params.use_gatk_spark.contains('markduplicates')) {
- MARKDUPLICATES_SPARK(ch_bam_for_markduplicates,
+ MARKDUPLICATES_SPARK(
+ ch_bam_for_markduplicates,
dict,
fasta,
fasta_fai,
@@ -551,12 +591,13 @@ workflow SAREK {
ch_cram_markduplicates_spark = MARKDUPLICATES_SPARK.out.cram
// Gather QC reports
- ch_reports = ch_reports.mix(MARKDUPLICATES_SPARK.out.qc.collect{it[1]}.ifEmpty([]))
+ ch_reports = ch_reports.mix(MARKDUPLICATES_SPARK.out.qc.collect{meta, report -> report})
// Gather used softwares versions
ch_versions = ch_versions.mix(MARKDUPLICATES_SPARK.out.versions)
} else {
- MARKDUPLICATES(ch_bam_for_markduplicates,
+ MARKDUPLICATES(
+ ch_bam_for_markduplicates,
fasta,
fasta_fai,
intervals_for_preprocessing)
@@ -564,7 +605,7 @@ workflow SAREK {
ch_cram_markduplicates_no_spark = MARKDUPLICATES.out.cram
// Gather QC reports
- ch_reports = ch_reports.mix(MARKDUPLICATES.out.qc.collect{it[1]}.ifEmpty([]))
+ ch_reports = ch_reports.mix(MARKDUPLICATES.out.qc.collect{meta, report -> report})
// Gather used softwares versions
ch_versions = ch_versions.mix(MARKDUPLICATES.out.versions)
@@ -579,14 +620,20 @@ workflow SAREK {
ch_cram_markduplicates_spark,
ch_cram_no_markduplicates_restart).map{ meta, cram, crai ->
//Make sure correct data types are carried through
- [[patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.id, data_type:"cram"], cram, crai]
+ [[
+ data_type: "cram",
+ id: meta.id,
+ patient: meta.patient,
+ sample: meta.sample,
+ sex: meta.sex,
+ status: meta.status
+ ],
+ cram, crai]
}
- // CSV should be written for the file actually out out, either CRAM or BAM
- csv_markduplicates = ch_md_cram_for_restart
-
+ // CSV should be written for the file actually out, either CRAM or BAM
// Create CSV to restart from this step
- if (!(params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) MARKDUPLICATES_CSV(csv_markduplicates)
+ if (!(params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) MARKDUPLICATES_CSV(ch_md_cram_for_restart)
}
if (params.step in ['mapping', 'markduplicates', 'prepare_recalibration']) {
@@ -598,13 +645,13 @@ workflow SAREK {
ch_input_sample.branch{
bam: it[0].data_type == "bam"
cram: it[0].data_type == "cram"
- }.set{convert}
+ }.set{ch_convert}
//BAM files first must be converted to CRAM files since from this step on we base everything on CRAM format
- SAMTOOLS_BAMTOCRAM(convert.bam, fasta, fasta_fai)
+ SAMTOOLS_BAMTOCRAM(ch_convert.bam, fasta, fasta_fai)
ch_versions = ch_versions.mix(SAMTOOLS_BAMTOCRAM.out.versions)
- ch_cram_for_prepare_recalibration = Channel.empty().mix(SAMTOOLS_BAMTOCRAM.out.alignment_index, convert.cram)
+ ch_cram_for_prepare_recalibration = Channel.empty().mix(SAMTOOLS_BAMTOCRAM.out.alignment_index, ch_convert.cram)
ch_md_cram_for_restart = SAMTOOLS_BAMTOCRAM.out.alignment_index
@@ -625,7 +672,8 @@ workflow SAREK {
ch_table_bqsr_spark = Channel.empty()
if (params.use_gatk_spark && params.use_gatk_spark.contains('baserecalibrator')) {
- PREPARE_RECALIBRATION_SPARK(ch_cram_for_prepare_recalibration,
+ PREPARE_RECALIBRATION_SPARK(
+ ch_cram_for_prepare_recalibration,
dict,
fasta,
fasta_fai,
@@ -639,7 +687,8 @@ workflow SAREK {
ch_versions = ch_versions.mix(PREPARE_RECALIBRATION_SPARK.out.versions)
} else {
- PREPARE_RECALIBRATION(ch_cram_for_prepare_recalibration,
+ PREPARE_RECALIBRATION(
+ ch_cram_for_prepare_recalibration,
dict,
fasta,
fasta_fai,
@@ -660,7 +709,7 @@ workflow SAREK {
ch_table_bqsr_no_spark,
ch_table_bqsr_spark)
- ch_reports = ch_reports.mix(ch_table_bqsr.map{ meta, table -> table})
+ ch_reports = ch_reports.mix(ch_table_bqsr.collect{ meta, table -> table})
ch_cram_applybqsr = ch_cram_for_prepare_recalibration.join(ch_table_bqsr)
@@ -679,18 +728,19 @@ workflow SAREK {
ch_input_sample.branch{
bam: it[0].data_type == "bam"
cram: it[0].data_type == "cram"
- }.set{convert}
+ }.set{ch_convert}
//If BAM file, split up table and mapped file to convert BAM to CRAM
- ch_bam_table = convert.bam.map{ meta, bam, bai, table -> [meta, table]}
- ch_bam_bam = convert.bam.map{ meta, bam, bai, table -> [meta, bam, bai]}
+ ch_bam_table = ch_convert.bam.map{ meta, bam, bai, table -> [meta, table]}
+ ch_bam_bam = ch_convert.bam.map{ meta, bam, bai, table -> [meta, bam, bai]}
//BAM files first must be converted to CRAM files since from this step on we base everything on CRAM format
SAMTOOLS_BAMTOCRAM(ch_bam_bam, fasta, fasta_fai)
ch_versions = ch_versions.mix(SAMTOOLS_BAMTOCRAM.out.versions)
- ch_cram_applybqsr = Channel.empty().mix(SAMTOOLS_BAMTOCRAM.out.alignment_index.join(ch_bam_table), // Join together converted cram with input tables
- convert.cram)
+ ch_cram_applybqsr = Channel.empty().mix(
+ SAMTOOLS_BAMTOCRAM.out.alignment_index.join(ch_bam_table),
+ ch_convert.cram) // Join together converted cram with input tables
}
if (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator'))) {
@@ -699,7 +749,8 @@ workflow SAREK {
if (params.use_gatk_spark && params.use_gatk_spark.contains('baserecalibrator')) {
- RECALIBRATE_SPARK(ch_cram_applybqsr,
+ RECALIBRATE_SPARK(
+ ch_cram_applybqsr,
dict,
fasta,
fasta_fai,
@@ -712,7 +763,8 @@ workflow SAREK {
} else {
- RECALIBRATE(ch_cram_applybqsr,
+ RECALIBRATE(
+ ch_cram_applybqsr,
dict,
fasta,
fasta_fai,
@@ -723,28 +775,29 @@ workflow SAREK {
// Gather used softwares versions
ch_versions = ch_versions.mix(RECALIBRATE.out.versions)
}
- cram_variant_calling = Channel.empty().mix(
+ ch_cram_variant_calling = Channel.empty().mix(
ch_cram_variant_calling_no_spark,
ch_cram_variant_calling_spark)
- CRAM_QC(cram_variant_calling,
+ CRAM_QC(
+ ch_cram_variant_calling,
fasta,
fasta_fai,
intervals_for_preprocessing)
// Gather QC reports
- ch_reports = ch_reports.mix(CRAM_QC.out.qc.collect{it[1]}.ifEmpty([]))
+ ch_reports = ch_reports.mix(CRAM_QC.out.qc.collect{meta, report -> report})
// Gather used softwares versions
ch_versions = ch_versions.mix(CRAM_QC.out.versions)
//If params.save_output_as_bam, then convert CRAM files to BAM
- SAMTOOLS_CRAMTOBAM_RECAL(cram_variant_calling, fasta, fasta_fai)
+ SAMTOOLS_CRAMTOBAM_RECAL(ch_cram_variant_calling, fasta, fasta_fai)
ch_versions = ch_versions.mix(SAMTOOLS_CRAMTOBAM_RECAL.out.versions)
// CSV should be written for the file actually out out, either CRAM or BAM
csv_recalibration = Channel.empty()
- csv_recalibration = params.save_output_as_bam ? SAMTOOLS_CRAMTOBAM_RECAL.out.alignment_index : cram_variant_calling
+ csv_recalibration = params.save_output_as_bam ? SAMTOOLS_CRAMTOBAM_RECAL.out.alignment_index : ch_cram_variant_calling
// Create CSV to restart from this step
RECALIBRATE_CSV(csv_recalibration)
@@ -754,12 +807,12 @@ workflow SAREK {
// ch_cram_variant_calling contains either:
// - input bams converted to crams, if started from step recal + skip BQSR
// - input crams if started from step recal + skip BQSR
- cram_variant_calling = Channel.empty().mix(SAMTOOLS_BAMTOCRAM.out.alignment_index,
- convert.cram.map{ meta, cram, crai, table -> [meta, cram, crai]})
+ ch_cram_variant_calling = Channel.empty().mix(SAMTOOLS_BAMTOCRAM.out.alignment_index,
+ ch_convert.cram.map{ meta, cram, crai, table -> [meta, cram, crai]})
} else {
// ch_cram_variant_calling contains either:
// - crams from markduplicates = ch_cram_for_prepare_recalibration if skip BQSR but not started from step recalibration
- cram_variant_calling = Channel.empty().mix(ch_cram_for_prepare_recalibration)
+ ch_cram_variant_calling = Channel.empty().mix(ch_cram_for_prepare_recalibration)
}
}
@@ -768,69 +821,69 @@ workflow SAREK {
ch_input_sample.branch{
bam: it[0].data_type == "bam"
cram: it[0].data_type == "cram"
- }.set{convert}
+ }.set{ch_convert}
//BAM files first must be converted to CRAM files since from this step on we base everything on CRAM format
- SAMTOOLS_BAMTOCRAM_VARIANTCALLING(convert.bam, fasta, fasta_fai)
+ SAMTOOLS_BAMTOCRAM_VARIANTCALLING(ch_convert.bam, fasta, fasta_fai)
ch_versions = ch_versions.mix(SAMTOOLS_BAMTOCRAM_VARIANTCALLING.out.versions)
- cram_variant_calling = Channel.empty().mix(SAMTOOLS_BAMTOCRAM_VARIANTCALLING.out.alignment_index, convert.cram)
+ ch_cram_variant_calling = Channel.empty().mix(SAMTOOLS_BAMTOCRAM_VARIANTCALLING.out.alignment_index, ch_convert.cram)
}
if (params.tools) {
- if (params.step == 'annotate') cram_variant_calling = Channel.empty()
+ if (params.step == 'annotate') ch_cram_variant_calling = Channel.empty()
//
// Logic to separate germline samples, tumor samples with no matched normal, and combine tumor-normal pairs
//
- cram_variant_calling.branch{
+ ch_cram_variant_calling.branch{
normal: it[0].status == 0
tumor: it[0].status == 1
- }.set{cram_variant_calling_status}
+ }.set{ch_cram_variant_calling_status}
// All Germline samples
- cram_variant_calling_normal_to_cross = cram_variant_calling_status.normal.map{ meta, cram, crai -> [meta.patient, meta, cram, crai] }
+ ch_cram_variant_calling_normal_to_cross = ch_cram_variant_calling_status.normal.map{ meta, cram, crai -> [meta.patient, meta, cram, crai] }
// All tumor samples
- cram_variant_calling_pair_to_cross = cram_variant_calling_status.tumor.map{ meta, cram, crai -> [meta.patient, meta, cram, crai] }
+ ch_cram_variant_calling_pair_to_cross = ch_cram_variant_calling_status.tumor.map{ meta, cram, crai -> [meta.patient, meta, cram, crai] }
// Tumor only samples
// 1. Group together all tumor samples by patient ID [patient1, [meta1, meta2], [cram1,crai1, cram2, crai2]]
// Downside: this only works by waiting for all tumor samples to finish preprocessing, since no group size is provided
- cram_variant_calling_tumor_grouped = cram_variant_calling_pair_to_cross.groupTuple()
+ ch_cram_variant_calling_tumor_grouped = ch_cram_variant_calling_pair_to_cross.groupTuple()
// 2. Join with normal samples, in each channel there is one key per patient now. Patients without matched normal end up with: [patient1, [meta1, meta2], [cram1,crai1, cram2, crai2], null]
- cram_variant_calling_tumor_joined = cram_variant_calling_tumor_grouped.join(cram_variant_calling_normal_to_cross, remainder: true)
+ ch_cram_variant_calling_tumor_joined = ch_cram_variant_calling_tumor_grouped.join(ch_cram_variant_calling_normal_to_cross, remainder: true)
// 3. Filter out entries with last entry null
- cram_variant_calling_tumor_filtered = cram_variant_calling_tumor_joined.filter{ it -> !(it.last()) }
+ ch_cram_variant_calling_tumor_filtered = ch_cram_variant_calling_tumor_joined.filter{ it -> !(it.last()) }
// 4. Transpose [patient1, [meta1, meta2], [cram1,crai1, cram2, crai2]] back to [patient1, meta1, [cram1,crai1], null] [patient1, meta2, [cram2,crai2], null]
// and remove patient ID field & null value for further processing [meta1, [cram1,crai1]] [meta2, [cram2,crai2]]
- cram_variant_calling_tumor_only = cram_variant_calling_tumor_filtered.transpose().map{ it -> [it[1], it[2], it[3]] }
+ ch_cram_variant_calling_tumor_only = ch_cram_variant_calling_tumor_filtered.transpose().map{ it -> [it[1], it[2], it[3]] }
if(params.only_paired_variant_calling){
// Normal only samples
// 1. Join with tumor samples, in each channel there is one key per patient now. Patients without matched tumor end up with: [patient1, [meta1], [cram1,crai1], null] as there is only one matched normal possible
- cram_variant_calling_normal_joined = cram_variant_calling_normal_to_cross.join(cram_variant_calling_tumor_grouped, remainder: true)
+ ch_cram_variant_calling_normal_joined = ch_cram_variant_calling_normal_to_cross.join(ch_cram_variant_calling_tumor_grouped, remainder: true)
// 2. Filter out entries with last entry null
- cram_variant_calling_normal_filtered = cram_variant_calling_normal_joined.filter{ it -> !(it.last()) }
+ ch_cram_variant_calling_normal_filtered = ch_cram_variant_calling_normal_joined.filter{ it -> !(it.last()) }
// 3. Remove patient ID field & null value for further processing [meta1, [cram1,crai1]] [meta2, [cram2,crai2]] (no transposing needed since only one normal per patient ID)
- cram_variant_calling_status_normal = cram_variant_calling_normal_filtered.map{ it -> [it[1], it[2], it[3]] }
+ ch_cram_variant_calling_status_normal = ch_cram_variant_calling_normal_filtered.map{ it -> [it[1], it[2], it[3]] }
}else{
- cram_variant_calling_status_normal = cram_variant_calling_status.normal
+ ch_cram_variant_calling_status_normal = ch_cram_variant_calling_status.normal
}
// Tumor - normal pairs
// Use cross to combine normal with all tumor samples, i.e. multi tumor samples from recurrences
- cram_variant_calling_pair = cram_variant_calling_normal_to_cross.cross(cram_variant_calling_pair_to_cross)
+ ch_cram_variant_calling_pair = ch_cram_variant_calling_normal_to_cross.cross(ch_cram_variant_calling_pair_to_cross)
.map { normal, tumor ->
def meta = [:]
meta.patient = normal[0]
@@ -845,8 +898,8 @@ workflow SAREK {
// GERMLINE VARIANT CALLING
GERMLINE_VARIANT_CALLING(
params.tools,
- cram_variant_calling_status_normal,
- [],
+ ch_cram_variant_calling_status_normal,
+ [], //bwa_index for tiddit; not used here
dbsnp,
dbsnp_tbi,
dict,
@@ -863,8 +916,8 @@ workflow SAREK {
// TUMOR ONLY VARIANT CALLING
TUMOR_ONLY_VARIANT_CALLING(
params.tools,
- cram_variant_calling_tumor_only,
- [],
+ ch_cram_variant_calling_tumor_only,
+ [], //bwa_index for tiddit; not used here
chr_files,
cnvkit_reference,
dbsnp,
@@ -885,8 +938,8 @@ workflow SAREK {
// PAIR VARIANT CALLING
PAIR_VARIANT_CALLING(
params.tools,
- cram_variant_calling_pair,
- [],
+ ch_cram_variant_calling_pair,
+ [], //bwa_index for tiddit; not used here
chr_files,
dbsnp,
dbsnp_tbi,
@@ -936,10 +989,10 @@ workflow SAREK {
VCF_QC(vcf_to_annotate, intervals_bed_combined)
ch_versions = ch_versions.mix(VCF_QC.out.versions)
- ch_reports = ch_reports.mix(VCF_QC.out.bcftools_stats.collect{it[1]}.ifEmpty([]))
- ch_reports = ch_reports.mix(VCF_QC.out.vcftools_tstv_counts.collect{it[1]}.ifEmpty([]))
- ch_reports = ch_reports.mix(VCF_QC.out.vcftools_tstv_qual.collect{it[1]}.ifEmpty([]))
- ch_reports = ch_reports.mix(VCF_QC.out.vcftools_filter_summary.collect{it[1]}.ifEmpty([]))
+ ch_reports = ch_reports.mix(VCF_QC.out.bcftools_stats.collect{meta, stats -> stats})
+ ch_reports = ch_reports.mix(VCF_QC.out.vcftools_tstv_counts.collect{ meta, counts -> counts})
+ ch_reports = ch_reports.mix(VCF_QC.out.vcftools_tstv_qual.collect{ meta, qual -> qual })
+ ch_reports = ch_reports.mix(VCF_QC.out.vcftools_filter_summary.collect{meta, summary -> summary})
VARIANTCALLING_CSV(vcf_to_annotate)
@@ -951,7 +1004,8 @@ workflow SAREK {
vep_fasta = (params.vep_include_fasta) ? fasta : []
- ANNOTATE(vcf_to_annotate,
+ ANNOTATE(
+ vcf_to_annotate,
vep_fasta,
params.tools,
snpeff_db,
@@ -981,7 +1035,7 @@ workflow SAREK {
ch_multiqc_files = Channel.empty().mix(ch_version_yaml,
ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'),
- ch_reports.collect())
+ ch_reports.collect().ifEmpty([]))
ch_multiqc_configs = Channel.from(ch_multiqc_config).mix(ch_multiqc_custom_config).ifEmpty([])