diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0b753a453d..f4ac5b3605 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,6 +55,7 @@ jobs: - "strelkabp" - "targeted" - "tiddit" + - "trimming" - "tumor_normal_pair" - "variantcalling_channel" diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 9c69efef1f..6b6fdb1c7f 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -18,10 +18,10 @@ export_plots: true run_modules: - custom_content - fastqc - - cutadapt + - fastp - picard - samtools - - qualimap + - mosdepth - gatk - bcftools - vcftools @@ -33,19 +33,15 @@ module_order: name: "FastQC (raw)" path_filters_exclude: - "*_val_*.zip" - - cutadapt: - name: "Cutadapt" - - fastqc: - name: "FastQC (trimmed)" - path_filters: - - "*_val_*.zip" + - fastp: + name: "FastP (Read preprocessing)" - picard: name: "GATK4 MarkDuplicates" info: " metrics generated either by GATK4 MarkDuplicates or EstimateLibraryComplexity (with --use_gatk_spark)." - samtools: name: "Samtools Flagstat" - - qualimap: - name: "Qualimap" + - mosdepth: + name: "Mosdepth" - gatk: name: "GATK4 BQSR" - bcftools: diff --git a/conf/modules.config b/conf/modules.config index c767a03ee3..939f271688 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -193,23 +193,29 @@ process { } // TRIMMING - withName: 'TRIMGALORE' { - ext.args = '--fastqc' - publishDir = [ - [ - path: { "${params.outdir}/reports/trimgalore/${meta.id}/fastqc" }, - mode: params.publish_dir_mode, - pattern: "*{html,zip}" - ], + + withName: FASTP { + ext.args = [ "", + params.trim_fastq ?: "--disable_adapter_trimming", + params.clip_r1 > 0 ? "--trim_front1 ${params.clip_r1}" : "", // Remove bp from the 5' end of read 1. + params.three_prime_clip_r1 > 0 ? "--trim_tail1 ${params.three_prime_clip_r1}" : "", // Remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed. + params.clip_r2 > 0 ? "--trim_front2 ${params.clip_r2}" : "", //Remove bp from the 5' end of read 2. + params.three_prime_clip_r2 > 0 ? "--trim_tail2 ${params.three_prime_clip_r2}" : "", // Remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed. + params.trim_nextseq ? "--trim_poly_g" : "", // Apply the --nextseq=X option, to trim based on quality after removing poly-G tails. + params.split_fastq > 0 ? "--split_by_lines ${params.split_fastq * 4}" : "", + + ].join(" ").trim() + publishDir = [ [ - path: { "${params.outdir}/trimgalore/${meta.id}" }, + path: { "${params.outdir}/reports/fastp/${meta.sample}" }, mode: params.publish_dir_mode, - pattern: "*fq.gz" + pattern: "*.{html,json,log}" ], [ - path: { "${params.outdir}/reports/trimgalore/${meta.id}" }, + enabled: params.save_trimmed || params.save_split_fastqs, + path: { "${params.outdir}/preprocessing/${meta.sample}/fastp" }, mode: params.publish_dir_mode, - pattern: "*txt" + pattern: "*.fastp.fastq.gz" ] ] } @@ -239,19 +245,6 @@ process { ext.prefix = {"${meta.id}_unsorted_tagged"} } -// SPLIT FASTQ - - withName: 'SEQKIT_SPLIT2' { - ext.args = { "--by-size ${params.split_fastq}" } - ext.when = { params.split_fastq > 1 } - publishDir = [ - enabled: params.save_split_fastqs, - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/${meta.sample}/seqkit" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // MAPPING if (params.step == 'mapping') { withName: ".*GATK4_MAPPING:BWAMEM1_MEM" { @@ -272,12 +265,12 @@ process { // However if it's skipped, reads need to be coordinate-sorted // Only name sort if Spark for Markduplicates + duplicate marking is not skipped ext.args2 = { params.use_gatk_spark && params.use_gatk_spark.contains('markduplicates') && (!params.skip_tools || (params.skip_tools && !params.skip_tools.contains('markduplicates'))) ? '-n' : '' } - ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.findAll(/part_([0-9]+)?/).last()) : "" } + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.tokenize('.')[0]) : "" } publishDir = [ mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/${meta.id}/" }, + path: { "${params.outdir}/preprocessing/" }, pattern: "*bam", - saveAs: { (params.save_bam_mapped || (params.skip_tools && params.skip_tools.contains('markduplicates'))) && (meta.size * meta.numLanes == 1) ? "mapped/${it}" : null } + saveAs: { (params.save_bam_mapped || (params.skip_tools && params.skip_tools.contains('markduplicates'))) && (meta.size * meta.numLanes == 1) ? "${meta.id}/mapped/${it}" : null } ] } @@ -479,28 +472,18 @@ process { ] } - withName: 'DEEPTOOLS_BAMCOVERAGE' { - ext.when = { !(params.skip_tools && params.skip_tools.contains('deeptools')) } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reports/deeptools/${meta.id}" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: 'QUALIMAP_BAMQCCRAM' { - ext.args = '--paint-chromosome-limits --genome-gc-distr HUMAN -skip-duplicated --skip-dup-mode 0 -outformat HTML' + withName: 'MOSDEPTH' { + ext.args = { !params.wes ? "-n --fast-mode --by 500" : ""} ext.prefix = { "${meta.id}.mapped" } - ext.when = { !(params.skip_tools && params.skip_tools.contains('bamqc')) } - publishDir = [ + publishDir = [ mode: params.publish_dir_mode, - path: { "${params.outdir}/reports/qualimap/${meta.id}" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + path: { "${params.outdir}/reports/mosdepth/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } if (params.step == 'mapping' || params.step == 'markduplicates'|| params.step == 'prepare_recalibration'|| params.step == 'recalibrate') { - withName: 'NFCORE_SAREK:SAREK:CRAM_QC:QUALIMAP_BAMQCCRAM' { + withName: 'NFCORE_SAREK:SAREK:CRAM_QC:MOSDEPTH' { ext.prefix = { "${meta.id}.recal" } } @@ -1146,6 +1129,7 @@ process{ // VEP if (params.tools && (params.tools.contains('vep') || params.tools.contains('merge'))) { withName: 'ENSEMBLVEP' { + // If just VEP: _VEP.ann.vcf ext.prefix = { "${vcf.baseName.minus(".vcf")}_VEP" } ext.args = [ '--everything --filter_common --per_gene --total_length --offline --format vcf', @@ -1171,16 +1155,10 @@ process{ } } - if (params.tools && params.tools.contains('vep')) { - withName: 'NFCORE_SAREK:SAREK:ANNOTATE:ANNOTATION_ENSEMBLVEP:ENSEMBLVEP' { - ext.prefix = {"${meta.id}_VEP"} - } - } - // SNPEFF THEN VEP if (params.tools && params.tools.contains('merge')) { withName: ".*:ANNOTATION_MERGE:ENSEMBLVEP" { - // Output file will have format *_snpEff_VEP.ann.vcf, *_snpEff_VEP.ann.json or *_snpEff_VEP.ann.tab + // If megre: Output file will have format *_snpEff_VEP.ann.vcf, *_snpEff_VEP.ann.json or *_snpEff_VEP.ann.tab ext.prefix = { "${vcf.baseName.minus(".ann.vcf")}_VEP" } } } diff --git a/docs/images/sarek_subway.png b/docs/images/sarek_subway.png index edd0a166a2..af1d6f11fc 100644 Binary files a/docs/images/sarek_subway.png and b/docs/images/sarek_subway.png differ diff --git a/docs/images/sarek_subway.svg b/docs/images/sarek_subway.svg index 802e7fcbdf..372dea2590 100644 --- a/docs/images/sarek_subway.svg +++ b/docs/images/sarek_subway.svg @@ -7,7 +7,7 @@ viewBox="0 0 297 210" version="1.1" id="svg5" - inkscape:version="1.2 (dc2aeda, 2022-05-15)" + inkscape:version="1.1 (c4e8f9e, 2021-05-24)" sodipodi:docname="sarek_subway.svg" inkscape:export-filename="sarek_subway.png" inkscape:export-xdpi="90" @@ -27,13 +27,13 @@ inkscape:document-units="mm" showgrid="true" inkscape:zoom="1.4127647" - inkscape:cx="577.23696" - inkscape:cy="353.56206" + inkscape:cx="552.10893" + inkscape:cy="134.13416" inkscape:window-width="1600" - inkscape:window-height="947" - inkscape:window-x="1024" - inkscape:window-y="25" - inkscape:window-maximized="1" + inkscape:window-height="587" + inkscape:window-x="60" + inkscape:window-y="41" + inkscape:window-maximized="0" inkscape:current-layer="layer4" width="211mm" fit-margin-top="0" @@ -846,25 +846,25 @@ - trimgalore UMI mapping - fastqc fastqc - - - bam/cram - - seqkit split2 + id="tspan6801">fastp snpeff @@ -1460,21 +1389,21 @@ inkscape:export-xdpi="90" inkscape:export-filename="./polygon4618.png" id="text690" - y="-12.285065" - x="530.57935" + y="-36.968044" + x="555.26233" style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:12.5px;line-height:1.25;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.937508" xml:space="preserve" transform="rotate(-45)">deeptools, samtools, qualimap + x="555.26233" + y="-36.968044" + style="stroke-width:0.937508">mosdepth, samtools samtools, qualimap + x="727.77972" + y="71.877167" + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:12.5px;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro';text-align:center;text-anchor:middle;stroke-width:0.937508">mosdepth, samtools multiqc - - - vcf - - convert - - - + + bam/bam/cram - - - + style="font-weight:bold;-inkscape-font-specification:'Maven Pro Bold';stroke-width:0.435692px" + id="tspan9194">cram + d="m 80.58806,-616.72784 h 20.71177 v 9.14861 c 0,0.49398 0.40075,0.89406 0.89406,0.89406 h 7.89809 v 4.47922 H 80.58806 Z m 29.504,41.5363 h -29.504 v -8.61368 h 29.50392 v 8.61368 z" + id="path3209-1" /> + + - - - - + + + + vcf + + + + + markduplicates + + + bam/cram + + + + - - @@ -1951,19 +1906,19 @@ inkscape:export-xdpi="90" inkscape:export-filename="./polygon4618.png" id="text6815" - y="33.058819" - x="688.38373" + y="4.3283162" + x="658.77838" style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:12.5px;line-height:1.25;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro';display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.937508" xml:space="preserve">prepare recalibration @@ -1973,26 +1928,26 @@ inkscape:export-xdpi="90" inkscape:export-filename="./polygon4618.png" id="text4655" - y="63.788181" - x="752.2832" + y="45.226635" + x="733.72192" style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:12.5px;line-height:1.25;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro';display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.937508" xml:space="preserve">applybqsr @@ -2006,21 +1961,21 @@ @@ -2356,7 +2311,7 @@ cram @@ -2562,20 +2517,6 @@ id="circle1664-6-5" style="display:inline;fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:3.54334;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" transform="scale(1,-1)" /> - - @@ -2643,7 +2584,7 @@ ry="1.4062777" rx="1.40625" y="-524.63593" - x="422.8508" + x="387.2258" height="19.60778" width="19.60778" id="rect5979" @@ -2653,7 +2594,7 @@ id="rect5981" width="19.60778" height="19.60778" - x="422.8508" + x="387.2258" y="-549.49432" rx="1.40625" ry="1.4062777" /> @@ -2903,24 +2844,6 @@ id="path5684-4" inkscape:connector-curvature="0" /> - - - - + + + + + + + + + + + + + ubam versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - deeptools: \$(bamCoverage --version | sed -e "s/bamCoverage //g") - END_VERSIONS - """ - - } - else { - """ - bamCoverage \\ - --bam $input_out \\ - $args \\ - --numberOfProcessors ${task.cpus} \\ - --outFileName ${prefix} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - deeptools: \$(bamCoverage --version | sed -e "s/bamCoverage //g") - END_VERSIONS - """ - } - -} diff --git a/modules/nf-core/modules/deeptools/bamcoverage/meta.yml b/modules/nf-core/modules/deeptools/bamcoverage/meta.yml deleted file mode 100644 index c656691064..0000000000 --- a/modules/nf-core/modules/deeptools/bamcoverage/meta.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: deeptools_bamcoverage -description: This tool takes an alignment of reads or fragments as input (BAM file) and generates a coverage track (bigWig or bedGraph) as output. -keywords: - - sort -tools: - - deeptools: - description: A set of user-friendly tools for normalization and visualzation of deep-sequencing data - homepage: https://deeptools.readthedocs.io/en/develop/content/tools/bamCoverage.html - documentation: https://deeptools.readthedocs.io/en/develop/content/tools/bamCoverage.html - tool_dev_url: https://github.com/deeptools/deepTools/ - doi: "https://doi.org/10.1093/nar/gkw257" - licence: ["GPL v3"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM file - pattern: "*.{bam,cram}" - - input_index: - type: file - description: BAM/CRAM index file - pattern: "*.{bai,crai}" - - fasta: - type: file - description: Reference file the CRAM file was created with (required with CRAM input) - pattern: "*.{fasta,fa}" - - fasta_fai: - type: file - description: Index of the reference file (optional, but recommended) - pattern: "*.{fai}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - bigWig: - type: file - description: BigWig file - pattern: "*.bigWig" - - bedgraph: - type: file - description: Bedgraph file - pattern: "*.bedgraph" - -authors: - - "@FriederikeHanssen" - - "@SusiJo" diff --git a/modules/nf-core/modules/fastp/main.nf b/modules/nf-core/modules/fastp/main.nf new file mode 100644 index 0000000000..120392c561 --- /dev/null +++ b/modules/nf-core/modules/fastp/main.nf @@ -0,0 +1,75 @@ +process FASTP { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? 'bioconda::fastp=0.23.2' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastp:0.23.2--h79da9fb_0' : + 'quay.io/biocontainers/fastp:0.23.2--h79da9fb_0' }" + + input: + tuple val(meta), path(reads) + val save_trimmed_fail + val save_merged + + output: + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads + tuple val(meta), path('*.json') , emit: json + tuple val(meta), path('*.html') , emit: html + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail + tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + // Added soft-links to original fastqs for consistent naming in MultiQC + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + def fail_fastq = save_trimmed_fail ? "--failed_out ${prefix}.fail.fastq.gz" : '' + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + fastp \\ + --in1 ${prefix}.fastq.gz \\ + --out1 ${prefix}.fastp.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $fail_fastq \\ + $args \\ + 2> ${prefix}.fastp.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else { + def fail_fastq = save_trimmed_fail ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz + fastp \\ + --in1 ${prefix}_1.fastq.gz \\ + --in2 ${prefix}_2.fastq.gz \\ + --out1 ${prefix}_1.fastp.fastq.gz \\ + --out2 ${prefix}_2.fastp.fastq.gz \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $fail_fastq \\ + $merge_fastq \\ + --thread $task.cpus \\ + --detect_adapter_for_pe \\ + $args \\ + 2> ${prefix}.fastp.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/modules/fastp/meta.yml b/modules/nf-core/modules/fastp/meta.yml new file mode 100644 index 0000000000..2bd2b1a91f --- /dev/null +++ b/modules/nf-core/modules/fastp/meta.yml @@ -0,0 +1,68 @@ +name: fastp +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - quality control + - fastq +tools: + - fastp: + description: | + A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. + documentation: https://github.com/OpenGene/fastp + doi: https://doi.org/10.1093/bioinformatics/bty560 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` + - save_merged: + type: boolean + description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: The trimmed/modified/unmerged fastq reads + pattern: "*fastp.fastq.gz" + - json: + type: file + description: Results in JSON format + pattern: "*.json" + - html: + type: file + description: Results in HTML format + pattern: "*.html" + - log: + type: file + description: fastq log file + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads_fail: + type: file + description: Reads the failed the preprocessing + pattern: "*fail.fastq.gz" + - reads_merged: + type: file + description: Reads that were successfully merged + pattern: "*.{merged.fastq.gz}" +authors: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/modules/mosdepth/main.nf b/modules/nf-core/modules/mosdepth/main.nf new file mode 100644 index 0000000000..d7e3c92904 --- /dev/null +++ b/modules/nf-core/modules/mosdepth/main.nf @@ -0,0 +1,81 @@ +process MOSDEPTH { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? 'bioconda::mosdepth=0.3.3' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mosdepth:0.3.3--hdfd78af_1' : + 'quay.io/biocontainers/mosdepth:0.3.3--hdfd78af_1'}" + + input: + tuple val(meta), path(bam), path(bai) + path bed + path fasta + + output: + tuple val(meta), path('*.global.dist.txt') , emit: global_txt + tuple val(meta), path('*.summary.txt') , emit: summary_txt + tuple val(meta), path('*.region.dist.txt') , optional:true, emit: regions_txt + tuple val(meta), path('*.per-base.d4') , optional:true, emit: per_base_d4 + tuple val(meta), path('*.per-base.bed.gz') , optional:true, emit: per_base_bed + tuple val(meta), path('*.per-base.bed.gz.csi') , optional:true, emit: per_base_csi + tuple val(meta), path('*.regions.bed.gz') , optional:true, emit: regions_bed + tuple val(meta), path('*.regions.bed.gz.csi') , optional:true, emit: regions_csi + tuple val(meta), path('*.quantized.bed.gz') , optional:true, emit: quantized_bed + tuple val(meta), path('*.quantized.bed.gz.csi') , optional:true, emit: quantized_csi + tuple val(meta), path('*.thresholds.bed.gz') , optional:true, emit: thresholds_bed + tuple val(meta), path('*.thresholds.bed.gz.csi'), optional:true, emit: thresholds_csi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--fasta ${fasta}" : "" + def interval = bed ? "--by ${bed}" : "" + if (bed && args.contains("--by")) { + exit 1, "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" + } + if (!bed && args.contains("--thresholds")) { + exit 1, "'--thresholds' can only be specified in conjunction with '--by'" + } + + """ + mosdepth \\ + --threads $task.cpus \\ + $interval \\ + $reference \\ + $args \\ + $prefix \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mosdepth: \$(mosdepth --version 2>&1 | sed 's/^.*mosdepth //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.global.dist.txt + touch ${prefix}.region.dist.txt + touch ${prefix}.summary.txt + touch ${prefix}.per-base.d4 + touch ${prefix}.per-base.bed.gz + touch ${prefix}.per-base.bed.gz.csi + touch ${prefix}.regions.bed.gz + touch ${prefix}.regions.bed.gz.csi + touch ${prefix}.quantized.bed.gz + touch ${prefix}.quantized.bed.gz.csi + touch ${prefix}.thresholds.bed.gz + touch ${prefix}.thresholds.bed.gz.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mosdepth: \$(mosdepth --version 2>&1 | sed 's/^.*mosdepth //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/mosdepth/meta.yml b/modules/nf-core/modules/mosdepth/meta.yml new file mode 100644 index 0000000000..d1e3344719 --- /dev/null +++ b/modules/nf-core/modules/mosdepth/meta.yml @@ -0,0 +1,99 @@ +name: mosdepth +description: Calculates genome-wide sequencing coverage. +keywords: + - mosdepth + - bam + - cram + - coverage +tools: + - mosdepth: + description: | + Fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. + documentation: https://github.com/brentp/mosdepth + doi: 10.1093/bioinformatics/btx699 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Input BAM/CRAM file + pattern: "*.{bam,cram}" + - bai: + type: file + description: Index for BAM/CRAM file + pattern: "*.{bai,crai}" + - bed: + type: file + description: BED file with intersected intervals + pattern: "*.{bed}" + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - global_txt: + type: file + description: Text file with global cumulative coverage distribution + pattern: "*.{global.dist.txt}" + - regions_txt: + type: file + description: Text file with region cumulative coverage distribution + pattern: "*.{region.dist.txt}" + - summary_txt: + type: file + description: Text file with summary mean depths per chromosome and regions + pattern: "*.{summary.txt}" + - per_base_bed: + type: file + description: BED file with per-base coverage + pattern: "*.{per-base.bed.gz}" + - per_base_csi: + type: file + description: Index file for BED file with per-base coverage + pattern: "*.{per-base.bed.gz.csi}" + - per_base_d4: + type: file + description: D4 file with per-base coverage + pattern: "*.{per-base.d4}" + - regions_bed: + type: file + description: BED file with per-region coverage + pattern: "*.{regions.bed.gz}" + - regions_csi: + type: file + description: Index file for BED file with per-region coverage + pattern: "*.{regions.bed.gz.csi}" + - quantized_bed: + type: file + description: BED file with binned coverage + pattern: "*.{quantized.bed.gz}" + - quantized_csi: + type: file + description: Index file for BED file with binned coverage + pattern: "*.{quantized.bed.gz.csi}" + - thresholds_bed: + type: file + description: BED file with the number of bases in each region that are covered at or above each threshold + pattern: "*.{thresholds.bed.gz}" + - thresholds_csi: + type: file + description: Index file for BED file with threshold coverage + pattern: "*.{thresholds.bed.gz.csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@ramprasadn" + - "@matthdsm" diff --git a/modules/nf-core/modules/qualimap/bamqc/main.nf b/modules/nf-core/modules/qualimap/bamqc/main.nf deleted file mode 100644 index 3bfcb4c1f5..0000000000 --- a/modules/nf-core/modules/qualimap/bamqc/main.nf +++ /dev/null @@ -1,123 +0,0 @@ -process QUALIMAP_BAMQC { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::qualimap=2.2.2d" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/qualimap:2.2.2d--1' : - 'quay.io/biocontainers/qualimap:2.2.2d--1' }" - - input: - tuple val(meta), path(bam) - path gff - - output: - tuple val(meta), path("${prefix}"), emit: results - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - - def collect_pairs = meta.single_end ? '' : '--collect-overlap-pairs' - def memory = task.memory.toGiga() + "G" - def regions = gff ? "--gff $gff" : '' - - def strandedness = 'non-strand-specific' - if (meta.strandedness == 'forward') { - strandedness = 'strand-specific-forward' - } else if (meta.strandedness == 'reverse') { - strandedness = 'strand-specific-reverse' - } - """ - unset DISPLAY - mkdir tmp - export _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp - qualimap \\ - --java-mem-size=$memory \\ - bamqc \\ - $args \\ - -bam $bam \\ - $regions \\ - -p $strandedness \\ - $collect_pairs \\ - -outdir $prefix \\ - -nt $task.cpus - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - qualimap: \$(echo \$(qualimap 2>&1) | sed 's/^.*QualiMap v.//; s/Built.*\$//') - END_VERSIONS - """ - - stub: - prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - """ - mkdir -p $prefix/css - mkdir $prefix/images_qualimapReport - mkdir $prefix/raw_data_qualimapReport - cd $prefix/css - touch agogo.css - touch basic.css - touch bgtop.png - touch comment-close.png - touch doctools.js - touch down-pressed.png - touch jquery.js - touch plus.png - touch qualimap_logo_small.png - touch searchtools.js - touch up.png - touch websupport.js - touch ajax-loader.gif - touch bgfooter.png - touch comment-bright.png - touch comment.png - touch down.png - touch file.png - touch minus.png - touch pygments.css - touch report.css - touch underscore.js - touch up-pressed.png - cd ../images_qualimapReport/ - touch genome_coverage_0to50_histogram.png - touch genome_coverage_quotes.png - touch genome_insert_size_across_reference.png - touch genome_mapping_quality_histogram.png - touch genome_uniq_read_starts_histogram.png - touch genome_coverage_across_reference.png - touch genome_gc_content_per_window.png - touch genome_insert_size_histogram.png - touch genome_reads_clipping_profile.png - touch genome_coverage_histogram.png - touch genome_homopolymer_indels.png - touch genome_mapping_quality_across_reference.png - touch genome_reads_content_per_read_position.png - cd ../raw_data_qualimapReport - touch coverage_across_reference.txt - touch genome_fraction_coverage.txt - touch insert_size_histogram.txt - touch mapped_reads_nucleotide_content.txt - touch coverage_histogram.txt - touch homopolymer_indels.txt - touch mapped_reads_clipping_profile.txt - touch mapping_quality_across_reference.txt - touch duplication_rate_histogram.txt - touch insert_size_across_reference.txt - touch mapped_reads_gc-content_distribution.txt - touch mapping_quality_histogram.txt - cd ../ - touch genome_results.txt - touch qualimapReport.html - cd ../ - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - qualimap: \$(echo \$(qualimap 2>&1) | sed 's/^.*QualiMap v.//; s/Built.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/qualimap/bamqc/meta.yml b/modules/nf-core/modules/qualimap/bamqc/meta.yml deleted file mode 100644 index 303532eb7c..0000000000 --- a/modules/nf-core/modules/qualimap/bamqc/meta.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: qualimap_bamqc -description: Evaluate alignment data -keywords: - - quality control - - qc - - bam -tools: - - qualimap: - description: | - Qualimap 2 is a platform-independent application written in - Java and R that provides both a Graphical User Interface and - a command-line interface to facilitate the quality control of - alignment sequencing data and its derivatives like feature counts. - homepage: http://qualimap.bioinfo.cipf.es/ - documentation: http://qualimap.conesalab.org/doc_html/index.html - doi: 10.1093/bioinformatics/bts503 - licence: ["GPL-2.0-only"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file - pattern: "*.{bam}" - - gff: - type: file - description: Feature file with regions of interest - pattern: "*.{gff,gtf,bed}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - results: - type: dir - description: Qualimap results dir - pattern: "*/*" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@phue" diff --git a/modules/nf-core/modules/qualimap/bamqccram/main.nf b/modules/nf-core/modules/qualimap/bamqccram/main.nf deleted file mode 100644 index e136b8e24f..0000000000 --- a/modules/nf-core/modules/qualimap/bamqccram/main.nf +++ /dev/null @@ -1,60 +0,0 @@ -process QUALIMAP_BAMQCCRAM { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::qualimap=2.2.2d bioconda::samtools=1.15.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-d3934ca6bb4e61334891ffa2e9a4c87a530e3188:61f6d4658ac88635fc37623af50bba77561988ab-0' : - 'quay.io/biocontainers/mulled-v2-d3934ca6bb4e61334891ffa2e9a4c87a530e3188:61f6d4658ac88635fc37623af50bba77561988ab-0' }" - - input: - tuple val(meta), path(cram), path(crai) - path gff - path fasta - path fasta_fai - - output: - tuple val(meta), path("${prefix}"), emit: results - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - - def collect_pairs = meta.single_end ? '' : '--collect-overlap-pairs' - def memory = task.memory.toGiga() + "G" - def regions = gff ? "--gff $gff" : '' - - def strandedness = 'non-strand-specific' - if (meta.strandedness == 'forward') { - strandedness = 'strand-specific-forward' - } else if (meta.strandedness == 'reverse') { - strandedness = 'strand-specific-reverse' - } - """ - unset DISPLAY - mkdir tmp - export _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp - - samtools view -hb -T ${fasta} ${cram} | - qualimap \\ - --java-mem-size=$memory \\ - bamqc \\ - $args \\ - -bam /dev/stdin \\ - $regions \\ - -p $strandedness \\ - $collect_pairs \\ - -outdir $prefix \\ - -nt $task.cpus - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - qualimap: \$(echo \$(qualimap 2>&1) | sed 's/^.*QualiMap v.//; s/Built.*\$//') - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/qualimap/bamqccram/meta.yml b/modules/nf-core/modules/qualimap/bamqccram/meta.yml deleted file mode 100644 index d72f203df6..0000000000 --- a/modules/nf-core/modules/qualimap/bamqccram/meta.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: qualimap_bamqccram -description: Evaluate alignment data -keywords: - - quality control - - qc - - bam -tools: - - qualimap: - description: | - Qualimap 2 is a platform-independent application written in - Java and R that provides both a Graphical User Interface and - a command-line interface to facilitate the quality control of - alignment sequencing data and its derivatives like feature counts. - homepage: http://qualimap.bioinfo.cipf.es/ - documentation: http://qualimap.conesalab.org/doc_html/index.html - doi: 10.1093/bioinformatics/bts503 - licence: ["GPL-2.0-only"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bacramm: - type: file - description: BAM file - pattern: "*.{bam}" - - gff: - type: file - description: Feature file with regions of interest - pattern: "*.{gff,gtf,bed}" - - fasta: - type: file - description: Reference file of cram file - pattern: "*.{fasta,fa,fna}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - results: - type: dir - description: Qualimap results dir - pattern: "*/*" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/seqkit/split2/main.nf b/modules/nf-core/modules/seqkit/split2/main.nf deleted file mode 100644 index ff91d86bd9..0000000000 --- a/modules/nf-core/modules/seqkit/split2/main.nf +++ /dev/null @@ -1,53 +0,0 @@ -process SEQKIT_SPLIT2 { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? 'bioconda::seqkit=2.1.0' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/seqkit:2.1.0--h9ee0642_0' : - 'quay.io/biocontainers/seqkit:2.1.0--h9ee0642_0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("**/*.gz"), emit: reads - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - if(meta.single_end){ - """ - seqkit \\ - split2 \\ - $args \\ - --threads $task.cpus \\ - $reads \\ - --out-dir ${prefix} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqkit: \$(echo \$(seqkit 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ - } else { - """ - seqkit \\ - split2 \\ - $args \\ - --threads $task.cpus \\ - --read1 ${reads[0]} \\ - --read2 ${reads[1]} \\ - --out-dir ${prefix} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqkit: \$(echo \$(seqkit 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ - } -} diff --git a/modules/nf-core/modules/seqkit/split2/meta.yml b/modules/nf-core/modules/seqkit/split2/meta.yml deleted file mode 100644 index 91c74b0c3e..0000000000 --- a/modules/nf-core/modules/seqkit/split2/meta.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: seqkit_split2 -description: Split single or paired-end fastq.gz files -keywords: - - split - - fastq -tools: - - seqkit: - description: | - Cross-platform and ultrafast toolkit for FASTA/Q file manipulation, written by Wei Shen. - homepage: https://github.com/shenwei356/seqkit - documentation: https://bioinf.shenwei.me/seqkit/ - doi: 10.1371/journal.pone.0163962 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: FastQ files - pattern: "*.{fq.gz/fastq.gz}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: Split fastq files - pattern: "*.{fq.gz/fastq.gz}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/trimgalore/main.nf b/modules/nf-core/modules/trimgalore/main.nf deleted file mode 100644 index 3a3fca9044..0000000000 --- a/modules/nf-core/modules/trimgalore/main.nf +++ /dev/null @@ -1,86 +0,0 @@ -process TRIMGALORE { - tag "$meta.id" - label 'process_high' - - conda (params.enable_conda ? 'bioconda::trim-galore=0.6.7' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/trim-galore:0.6.7--hdfd78af_0' : - 'quay.io/biocontainers/trim-galore:0.6.7--hdfd78af_0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*{trimmed,val}*.fq.gz"), emit: reads - tuple val(meta), path("*report.txt") , emit: log - path "versions.yml" , emit: versions - - tuple val(meta), path("*unpaired*.fq.gz") , emit: unpaired, optional: true - tuple val(meta), path("*.html") , emit: html , optional: true - tuple val(meta), path("*.zip") , emit: zip , optional: true - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - // Calculate number of --cores for TrimGalore based on value of task.cpus - // See: https://github.com/FelixKrueger/TrimGalore/blob/master/Changelog.md#version-060-release-on-1-mar-2019 - // See: https://github.com/nf-core/atacseq/pull/65 - def cores = 1 - if (task.cpus) { - cores = (task.cpus as int) - 4 - if (meta.single_end) cores = (task.cpus as int) - 3 - if (cores < 1) cores = 1 - if (cores > 4) cores = 4 - } - - // Clipping presets have to be evaluated in the context of SE/PE - def c_r1 = params.clip_r1 > 0 ? "--clip_r1 ${params.clip_r1}" : '' - def c_r2 = params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : '' - def tpc_r1 = params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : '' - def tpc_r2 = params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : '' - - // Added soft-links to original fastqs for consistent naming in MultiQC - def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz - trim_galore \\ - $args \\ - --cores $cores \\ - --gzip \\ - $c_r1 \\ - $tpc_r1 \\ - ${prefix}.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') - cutadapt: \$(cutadapt --version) - END_VERSIONS - """ - } else { - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - trim_galore \\ - $args \\ - --cores $cores \\ - --paired \\ - --gzip \\ - $c_r1 \\ - $c_r2 \\ - $tpc_r1 \\ - $tpc_r2 \\ - ${prefix}_1.fastq.gz \\ - ${prefix}_2.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') - cutadapt: \$(cutadapt --version) - END_VERSIONS - """ - } -} diff --git a/modules/nf-core/modules/trimgalore/meta.yml b/modules/nf-core/modules/trimgalore/meta.yml deleted file mode 100644 index 439f566dff..0000000000 --- a/modules/nf-core/modules/trimgalore/meta.yml +++ /dev/null @@ -1,64 +0,0 @@ -name: trimgalore -description: Trim FastQ files using Trim Galore! -keywords: - - trimming - - adapters - - sequencing adapters - - fastq -tools: - - trimgalore: - description: | - A wrapper tool around Cutadapt and FastQC to consistently apply quality - and adapter trimming to FastQ files, with some extra functionality for - MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/ - documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input adapter trimmed FastQ files of size 1 and 2 for - single-end and paired-end data, respectively. - pattern: "*.{fq.gz}" - - unpaired: - type: file - description: | - FastQ files containing unpaired reads from read 1 or read 2 - pattern: "*unpaired*.fq.gz" - - html: - type: file - description: FastQC report (optional) - pattern: "*_{fastqc.html}" - - zip: - type: file - description: FastQC report archive (optional) - pattern: "*_{fastqc.zip}" - - log: - type: file - description: Trim Galore! trimming report - pattern: "*_{report.txt}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@ewels" - - "@FelixKrueger" diff --git a/nextflow_schema.json b/nextflow_schema.json index 7826b83cd1..8b89da2bab 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -84,7 +84,7 @@ "fa_icon": "fas fa-forward", "description": "Disable specified tools.", "help_text": "Multiple tools can be specified, separated by commas.\n\n> **NB** `--skip_tools baserecalibrator_report` is actually just not saving the reports.\n> **NB** `--skip_tools markduplicates_report` does not skip `MarkDuplicates` but prevent the collection of duplicate metrics that slows down performance.\n> **NB** tools can be specified with no concern for case.", - "pattern": "^((bamqc|baserecalibrator|baserecalibrator_report|bcftools|documentation|fastqc|markduplicates|markduplicates_report|multiqc|samtools|vcftools|versions|deeptools)*(,)*)*$" + "pattern": "^((baserecalibrator|baserecalibrator_report|bcftools|documentation|fastqc|markduplicates|markduplicates_report|mosdepth|multiqc|samtools|vcftools|versions)*(,)*)*$" }, "wes": { "type": "boolean", diff --git a/subworkflows/local/split_fastq.nf b/subworkflows/local/split_fastq.nf deleted file mode 100644 index f58d967407..0000000000 --- a/subworkflows/local/split_fastq.nf +++ /dev/null @@ -1,37 +0,0 @@ -// -// SPLIT_FASTQ -// -// For all modules here: -// A when clause condition is defined in the conf/modules.config to determine if the module should be run - -include { SEQKIT_SPLIT2 } from '../../modules/nf-core/modules/seqkit/split2/main' - -workflow SPLIT_FASTQ { - take: - reads_input // channel: [mandatory] meta, reads_input - - main: - - ch_versions = Channel.empty() - - // Only if we want to split fastq files - SEQKIT_SPLIT2(reads_input) - - // Remapping the channel - reads = SEQKIT_SPLIT2.out.reads.map{ key, reads -> - //TODO maybe this can be replaced by a regex to include part_001 etc. - //sorts list of split fq files by : - //[R1.part_001, R2.part_001, R1.part_002, R2.part_002,R1.part_003, R2.part_003,...] - //TODO: determine whether it is possible to have an uneven number of parts, so remainder: true woud need to be used, I guess this could be possible for unfiltered reads, reads that don't have pairs etc. - read_files = reads.sort{ a,b -> a.getName().tokenize('.')[ a.getName().tokenize('.').size() - 3] <=> b.getName().tokenize('.')[ b.getName().tokenize('.').size() - 3]}.collate(2) - [[patient: key.patient, sample:key.sample, gender:key.gender, status:key.status, id:key.id, numLanes:key.numLanes, read_group:key.read_group, data_type:key.data_type, size:read_files.size()], - read_files] - }.transpose() - - ch_versions = ch_versions.mix(SEQKIT_SPLIT2.out.versions) - - emit: - reads = reads - versions = ch_versions -} - diff --git a/subworkflows/nf-core/bam_to_cram.nf b/subworkflows/nf-core/bam_to_cram.nf index 70ab9deb6a..e1c0ffff56 100644 --- a/subworkflows/nf-core/bam_to_cram.nf +++ b/subworkflows/nf-core/bam_to_cram.nf @@ -4,10 +4,9 @@ // For all modules here: // A when clause condition is defined in the conf/modules.config to determine if the module should be run -include { DEEPTOOLS_BAMCOVERAGE } from '../../modules/nf-core/modules/deeptools/bamcoverage/main' -include { QUALIMAP_BAMQCCRAM } from '../../modules/nf-core/modules/qualimap/bamqccram/main' include { SAMTOOLS_CONVERT as SAMTOOLS_BAMTOCRAM } from '../../modules/nf-core/modules/samtools/convert/main' include { SAMTOOLS_STATS as SAMTOOLS_STATS_CRAM } from '../../modules/nf-core/modules/samtools/stats/main' +include { MOSDEPTH } from '../../modules/nf-core/modules/mosdepth/main' workflow BAM_TO_CRAM { take: @@ -15,7 +14,8 @@ workflow BAM_TO_CRAM { cram_indexed fasta // channel: [mandatory] fasta fasta_fai // channel: [mandatory] fai - intervals_combined_bed_gz_tbi // channel: [optional] intervals_bed.gz, intervals_bed.gz.tbi + intervals_bed_combined // channel: [optional] intervals_bed + main: ch_versions = Channel.empty() @@ -30,19 +30,17 @@ workflow BAM_TO_CRAM { cram_indexed = Channel.empty().mix(cram_indexed,SAMTOOLS_BAMTOCRAM.out.alignment_index) // Reports on cram - DEEPTOOLS_BAMCOVERAGE(cram_indexed, fasta, fasta_fai) - QUALIMAP_BAMQCCRAM(cram_indexed, intervals_combined_bed_gz_tbi, fasta, fasta_fai) SAMTOOLS_STATS_CRAM(cram_indexed, fasta) + MOSDEPTH(cram_indexed, intervals_bed_combined, fasta) // Gather all reports generated - qc_reports = qc_reports.mix(DEEPTOOLS_BAMCOVERAGE.out.bigwig) - qc_reports = qc_reports.mix(QUALIMAP_BAMQCCRAM.out.results) qc_reports = qc_reports.mix(SAMTOOLS_STATS_CRAM.out.stats) + qc_reports = qc_reports.mix(MOSDEPTH.out.global_txt, + MOSDEPTH.out.regions_txt) // Gather versions of all tools used - ch_versions = ch_versions.mix(DEEPTOOLS_BAMCOVERAGE.out.versions.first()) - ch_versions = ch_versions.mix(QUALIMAP_BAMQCCRAM.out.versions.first()) - ch_versions = ch_versions.mix(SAMTOOLS_BAMTOCRAM.out.versions.first()) + ch_versions = ch_versions.mix(MOSDEPTH.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_BAMTOCRAM.out.versions) ch_versions = ch_versions.mix(SAMTOOLS_STATS_CRAM.out.versions) emit: diff --git a/subworkflows/nf-core/cram_qc.nf b/subworkflows/nf-core/cram_qc.nf index 8acd2d65c1..eb74fab07c 100644 --- a/subworkflows/nf-core/cram_qc.nf +++ b/subworkflows/nf-core/cram_qc.nf @@ -5,14 +5,14 @@ // A when clause condition is defined in the conf/modules.config to determine if the module should be run include { SAMTOOLS_STATS } from '../../modules/nf-core/modules/samtools/stats/main' -include { QUALIMAP_BAMQCCRAM } from '../../modules/nf-core/modules/qualimap/bamqccram/main' +include { MOSDEPTH } from '../../modules/nf-core/modules/mosdepth/main' workflow CRAM_QC { take: cram // channel: [mandatory] meta, cram, crai fasta // channel: [mandatory] fasta fasta_fai // channel: [mandatory] fasta_fai - intervals_combined_bed_gz_tbi + intervals_bed_combined main: ch_versions = Channel.empty() @@ -20,14 +20,15 @@ workflow CRAM_QC { // Reports run on cram SAMTOOLS_STATS(cram, fasta) - QUALIMAP_BAMQCCRAM(cram, intervals_combined_bed_gz_tbi, fasta, fasta_fai) + MOSDEPTH(cram, intervals_bed_combined, fasta) // Gather all reports generated qc_reports = qc_reports.mix(SAMTOOLS_STATS.out.stats) - qc_reports = qc_reports.mix(QUALIMAP_BAMQCCRAM.out.results) + qc_reports = qc_reports.mix(MOSDEPTH.out.global_txt, + MOSDEPTH.out.regions_txt) // Gather versions of all tools used - ch_versions = ch_versions.mix(QUALIMAP_BAMQCCRAM.out.versions.first()) + ch_versions = ch_versions.mix(MOSDEPTH.out.versions) ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) emit: diff --git a/subworkflows/nf-core/gatk4/markduplicates/main.nf b/subworkflows/nf-core/gatk4/markduplicates/main.nf index f211a8b54c..fdc7d248da 100644 --- a/subworkflows/nf-core/gatk4/markduplicates/main.nf +++ b/subworkflows/nf-core/gatk4/markduplicates/main.nf @@ -12,7 +12,7 @@ workflow MARKDUPLICATES { bam // channel: [mandatory] meta, bam fasta // channel: [mandatory] fasta fasta_fai // channel: [mandatory] fasta_fai - intervals_combined_bed_gz_tbi // channel: [optional] intervals_bed.gz, intervals_bed.gz.tbi + intervals_bed_combined // channel: [optional] intervals_bed main: ch_versions = Channel.empty() @@ -22,7 +22,7 @@ workflow MARKDUPLICATES { GATK4_MARKDUPLICATES(bam) // Convert output to cram - BAM_TO_CRAM(GATK4_MARKDUPLICATES.out.bam.join(GATK4_MARKDUPLICATES.out.bai), Channel.empty(), fasta, fasta_fai, intervals_combined_bed_gz_tbi) + BAM_TO_CRAM(GATK4_MARKDUPLICATES.out.bam.join(GATK4_MARKDUPLICATES.out.bai), Channel.empty(), fasta, fasta_fai, intervals_bed_combined) // Gather all reports generated qc_reports = qc_reports.mix(GATK4_MARKDUPLICATES.out.metrics) diff --git a/subworkflows/nf-core/gatk4/markduplicates_spark/main.nf b/subworkflows/nf-core/gatk4/markduplicates_spark/main.nf index 6e07f2c823..bffb2500d5 100644 --- a/subworkflows/nf-core/gatk4/markduplicates_spark/main.nf +++ b/subworkflows/nf-core/gatk4/markduplicates_spark/main.nf @@ -16,7 +16,7 @@ workflow MARKDUPLICATES_SPARK { dict // channel: [mandatory] dict fasta // channel: [mandatory] fasta fasta_fai // channel: [mandatory] fasta_fai - intervals_combined_bed_gz_tbi // channel: [optional] intervals_bed.gz, intervals_bed.gz.tbi + intervals_bed_combined // channel: [optional] intervals_bed main: ch_versions = Channel.empty() @@ -33,7 +33,7 @@ workflow MARKDUPLICATES_SPARK { SAMTOOLS_CRAMTOBAM(cram_markduplicates, fasta, fasta_fai) // Convert Markupduplicates spark bam output to cram when running bamqc and/or deeptools - BAM_TO_CRAM(Channel.empty(), cram_markduplicates, fasta, fasta_fai, intervals_combined_bed_gz_tbi) + BAM_TO_CRAM(Channel.empty(), cram_markduplicates, fasta, fasta_fai, intervals_bed_combined) // When running Marduplicates spark, and saving reports GATK4_ESTIMATELIBRARYCOMPLEXITY(bam, fasta, fasta_fai, dict) diff --git a/tests/test_aligner.yml b/tests/test_aligner.yml index f84125d652..07db7a9767 100644 --- a/tests/test_aligner.yml +++ b/tests/test_aligner.yml @@ -20,8 +20,14 @@ - path: results/reference/bwamem2/genome.fasta.bwt.2bit.64 - path: results/reference/bwamem2/genome.fasta.pac - path: results/reports/fastqc/test-test_L1 - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.mapped.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.mapped.regions.bed.gz + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools_stats/test/test.md.cram.stats - path: results/reports/samtools_stats/test/test.recal.cram.stats - name: Run dragmap @@ -49,7 +55,13 @@ - path: results/reference/dragmap/repeat_mask.bin - path: results/reference/dragmap/str_table.bin - path: results/reports/fastqc/test-test_L1 - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.mapped.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.mapped.regions.bed.gz + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools_stats/test/test.md.cram.stats - path: results/reports/samtools_stats/test/test.recal.cram.stats diff --git a/tests/test_default.yml b/tests/test_default.yml index 92bcae62eb..03eda15635 100644 --- a/tests/test_default.yml +++ b/tests/test_default.yml @@ -15,11 +15,16 @@ - path: results/csv/recalibrated.csv - path: results/reports/fastqc/test-test_L1 - path: results/reports/markduplicates/test/test.md.metrics - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.mapped.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.mapped.regions.bed.gz + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools_stats/test/test.md.cram.stats - path: results/reports/samtools_stats/test/test.recal.cram.stats - - path: results/reports/deeptools/test/test.bigWig - name: Run default pipeline without intervals command: nextflow run main.nf -profile test,no_intervals,docker @@ -39,11 +44,16 @@ - path: results/csv/recalibrated.csv - path: results/reports/fastqc/test-test_L1 - path: results/reports/markduplicates/test/test.md.metrics - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.mapped.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.mapped.regions.bed.gz + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools_stats/test/test.md.cram.stats - path: results/reports/samtools_stats/test/test.recal.cram.stats - - path: results/reports/deeptools/test/test.bigWig - name: Run default pipeline with VC command: nextflow run main.nf -profile test,docker --tools strelka @@ -62,11 +72,16 @@ - path: results/csv/recalibrated.csv - path: results/reports/fastqc/test-test_L1 - path: results/reports/markduplicates/test/test.md.metrics - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.mapped.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.mapped.regions.bed.gz + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools_stats/test/test.md.cram.stats - path: results/reports/samtools_stats/test/test.recal.cram.stats - - path: results/reports/deeptools/test/test.bigWig - path: results/variant_calling/test/strelka/test.genome.vcf.gz - path: results/variant_calling/test/strelka/test.genome.vcf.gz.tbi - path: results/variant_calling/test/strelka/test.variants.vcf.gz diff --git a/tests/test_gatk_spark.yml b/tests/test_gatk_spark.yml index 609d7fbed7..588335e66e 100644 --- a/tests/test_gatk_spark.yml +++ b/tests/test_gatk_spark.yml @@ -16,8 +16,13 @@ - path: results/csv/recalibrated.csv - path: results/reports/fastqc/test-test_L1 - path: results/reports/markduplicates/test/test.md.metrics - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.mapped.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.mapped.regions.bed.gz + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools_stats/test/test.md.cram.stats - path: results/reports/samtools_stats/test/test.recal.cram.stats - - path: results/reports/deeptools/test/test.bigWig diff --git a/tests/test_markduplicates.yml b/tests/test_markduplicates.yml index 4b69f5a332..7c9c459c85 100644 --- a/tests/test_markduplicates.yml +++ b/tests/test_markduplicates.yml @@ -14,8 +14,14 @@ - path: results/csv/markduplicates.csv - path: results/csv/markduplicates_no_table.csv - path: results/csv/recalibrated.csv - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.mapped.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.mapped.regions.bed.gz + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools_stats/test/test.md.cram.stats - path: results/reports/samtools_stats/test/test.recal.cram.stats - path: results/variant_calling/test/strelka/test.genome.vcf.gz @@ -39,8 +45,14 @@ - path: results/csv/markduplicates.csv - path: results/csv/markduplicates_no_table.csv - path: results/csv/recalibrated.csv - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.mapped.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.mapped.regions.bed.gz + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools_stats/test/test.md.cram.stats - path: results/reports/samtools_stats/test/test.recal.cram.stats - path: results/variant_calling/test/strelka/test.genome.vcf.gz diff --git a/tests/test_pair.yml b/tests/test_pair.yml index 1df6c387e6..a6306bfc0f 100644 --- a/tests/test_pair.yml +++ b/tests/test_pair.yml @@ -22,13 +22,23 @@ - path: results/reports/fastqc/test2-test_L1 - path: results/reports/markduplicates/test/test.md.metrics - path: results/reports/markduplicates/test2/test2.md.metrics - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal - - path: results/reports/qualimap/test2/test2.mapped - - path: results/reports/qualimap/test2/test2.recal + - path: results/reports/mosdepth/test/test.mapped.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.mapped.regions.bed.gz + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + - path: results/reports/mosdepth/test2/test2.mapped.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test2/test2.mapped.mosdepth.summary.txt + - path: results/reports/mosdepth/test2/test2.mapped.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test2/test2.mapped.regions.bed.gz + - path: results/reports/mosdepth/test2/test2.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test2/test2.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test2/test2.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test2/test2.recal.regions.bed.gz - path: results/reports/samtools_stats/test/test.md.cram.stats - path: results/reports/samtools_stats/test/test.recal.cram.stats - path: results/reports/samtools_stats/test2/test2.md.cram.stats - path: results/reports/samtools_stats/test2/test2.recal.cram.stats - - path: results/reports/deeptools/test/test.bigWig - - path: results/reports/deeptools/test2/test2.bigWig diff --git a/tests/test_prepare_recalibration.yml b/tests/test_prepare_recalibration.yml index 136cfa18eb..d6e0b4a752 100644 --- a/tests/test_prepare_recalibration.yml +++ b/tests/test_prepare_recalibration.yml @@ -10,7 +10,10 @@ - path: results/preprocessing/test/recalibrated/test.recal.cram - path: results/preprocessing/test/recalibrated/test.recal.cram.crai - path: results/csv/recalibrated.csv - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools_stats/test/test.recal.cram.stats - path: results/variant_calling/test/strelka/test.genome.vcf.gz - path: results/variant_calling/test/strelka/test.genome.vcf.gz.tbi @@ -29,7 +32,10 @@ - path: results/preprocessing/test/recalibrated/test.recal.cram - path: results/preprocessing/test/recalibrated/test.recal.cram.crai - path: results/csv/recalibrated.csv - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools_stats/test/test.recal.cram.stats - path: results/variant_calling/test/strelka/test.genome.vcf.gz - path: results/variant_calling/test/strelka/test.genome.vcf.gz.tbi @@ -54,7 +60,13 @@ should_exist: false - path: results/csv/recalibrated.csv should_exist: false - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + should_exist: false + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + should_exist: false + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + should_exist: false + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz should_exist: false - path: results/reports/samtools_stats/test/test.recal.cram.stats should_exist: false @@ -83,7 +95,13 @@ should_exist: false - path: results/csv/recalibrated.csv should_exist: false - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + should_exist: false + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + should_exist: false + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + should_exist: false + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz should_exist: false - path: results/reports/samtools_stats/test/test.recal.cram.stats should_exist: false diff --git a/tests/test_recalibrate.yml b/tests/test_recalibrate.yml index e8846781f3..47b470a00f 100644 --- a/tests/test_recalibrate.yml +++ b/tests/test_recalibrate.yml @@ -8,7 +8,10 @@ - path: results/multiqc - path: results/preprocessing/test/recalibrated/test.recal.cram - path: results/preprocessing/test/recalibrated/test.recal.cram.crai - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools_stats/test/test.recal.cram.stats - path: results/csv/recalibrated.csv - path: results/variant_calling/test/strelka/test.genome.vcf.gz @@ -26,7 +29,10 @@ - path: results/multiqc - path: results/preprocessing/test/recalibrated/test.recal.cram - path: results/preprocessing/test/recalibrated/test.recal.cram.crai - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools_stats/test/test.recal.cram.stats - path: results/csv/recalibrated.csv - path: results/variant_calling/test/strelka/test.genome.vcf.gz @@ -52,7 +58,13 @@ should_exist: false - path: results/csv/recalibrated.csv should_exist: false - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + should_exist: false + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + should_exist: false + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + should_exist: false + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz should_exist: false - path: results/reports/samtools_stats/test/test.recal.cram.stats should_exist: false @@ -81,7 +93,13 @@ should_exist: false - path: results/csv/recalibrated.csv should_exist: false - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + should_exist: false + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + should_exist: false + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + should_exist: false + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz should_exist: false - path: results/reports/samtools_stats/test/test.recal.cram.stats should_exist: false diff --git a/tests/test_save_bam_mapped.yml b/tests/test_save_bam_mapped.yml index f4e1787535..11368db0d0 100644 --- a/tests/test_save_bam_mapped.yml +++ b/tests/test_save_bam_mapped.yml @@ -17,7 +17,13 @@ - path: results/csv/markduplicates_no_table.csv - path: results/csv/recalibrated.csv - path: results/reports/fastqc/test-test_L1 - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.mapped.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.mapped.regions.bed.gz + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools_stats/test/test.md.cram.stats - path: results/reports/samtools_stats/test/test.recal.cram.stats diff --git a/tests/test_save_output_bam.yml b/tests/test_save_output_bam.yml index 2cc9a27fac..4ae277002c 100644 --- a/tests/test_save_output_bam.yml +++ b/tests/test_save_output_bam.yml @@ -14,8 +14,14 @@ - path: results/csv/markduplicates_no_table.csv - path: results/csv/recalibrated.csv - path: results/reports/fastqc/test-test_L1 - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.mapped.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.mapped.regions.bed.gz + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools_stats/test/test.md.cram.stats - path: results/reports/samtools_stats/test/test.recal.cram.stats @@ -35,7 +41,13 @@ - path: results/csv/markduplicates_no_table.csv - path: results/csv/recalibrated.csv - path: results/reports/fastqc/test-test_L1 - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.mapped.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.mapped.regions.bed.gz + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools_stats/test/test.md.cram.stats - path: results/reports/samtools_stats/test/test.recal.cram.stats diff --git a/tests/test_skip_markduplicates.yml b/tests/test_skip_markduplicates.yml index 3b8802cfb8..ef5ef221f7 100644 --- a/tests/test_skip_markduplicates.yml +++ b/tests/test_skip_markduplicates.yml @@ -16,11 +16,16 @@ - path: results/csv/markduplicates_no_table.csv # TODO These are no actually duplicate marked crams, but just mapped converted crams TODO renaming - path: results/csv/recalibrated.csv - path: results/reports/fastqc/test-test_L1 - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.mapped.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.mapped.regions.bed.gz + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools_stats/test/test.md.cram.stats - path: results/reports/samtools_stats/test/test.recal.cram.stats - - path: results/reports/deeptools/test/test.bigWig - path: results/variant_calling/test/strelka/test.genome.vcf.gz - path: results/variant_calling/test/strelka/test.genome.vcf.gz.tbi - path: results/variant_calling/test/strelka/test.variants.vcf.gz @@ -42,11 +47,16 @@ - path: results/csv/recalibrated.csv - path: results/csv/markduplicates.csv - path: results/csv/markduplicates_no_table.csv # TODO These are no actually duplicate marked crams, but just mapped converted crams TODO renaming - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.mapped.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.mapped.regions.bed.gz + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools_stats/test/test.md.cram.stats - path: results/reports/samtools_stats/test/test.recal.cram.stats - - path: results/reports/deeptools/test/test.bigWig - path: results/variant_calling/test/strelka/test.genome.vcf.gz - path: results/variant_calling/test/strelka/test.genome.vcf.gz.tbi - path: results/variant_calling/test/strelka/test.variants.vcf.gz @@ -72,11 +82,16 @@ should_exist: false - path: results/csv/markduplicates_no_table.csv should_exist: false - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.mapped.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.mapped.regions.bed.gz + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools_stats/test/test.paired_end.sorted.cram.stats - path: results/reports/samtools_stats/test/test.recal.cram.stats - - path: results/reports/deeptools/test/test.bigWig - path: results/variant_calling/test/strelka/test.genome.vcf.gz - path: results/variant_calling/test/strelka/test.genome.vcf.gz.tbi - path: results/variant_calling/test/strelka/test.variants.vcf.gz diff --git a/tests/test_split_fastq.yml b/tests/test_split_fastq.yml index 96bec90012..c00b9cb470 100644 --- a/tests/test_split_fastq.yml +++ b/tests/test_split_fastq.yml @@ -3,7 +3,7 @@ tags: - split_fastq files: - - path: results/preprocessing/test/seqkit/test-test_L1/test_1.part_001.fastq.gz - - path: results/preprocessing/test/seqkit/test-test_L1/test_1.part_002.fastq.gz - - path: results/preprocessing/test/seqkit/test-test_L1/test_2.part_001.fastq.gz - - path: results/preprocessing/test/seqkit/test-test_L1/test_2.part_002.fastq.gz + - path: results/preprocessing/test/fastp/0001.test-test_L1_1.fastp.fastq.gz + - path: results/preprocessing/test/fastp/0001.test-test_L1_2.fastp.fastq.gz + - path: results/preprocessing/test/fastp/0002.test-test_L1_1.fastp.fastq.gz + - path: results/preprocessing/test/fastp/0002.test-test_L1_2.fastp.fastq.gz diff --git a/tests/test_targeted.yml b/tests/test_targeted.yml index 0797b67680..da32e43673 100644 --- a/tests/test_targeted.yml +++ b/tests/test_targeted.yml @@ -15,8 +15,13 @@ - path: results/csv/recalibrated.csv - path: results/reports/fastqc/test-test_L1 - path: results/reports/markduplicates/test/test.md.metrics - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal + - path: results/reports/mosdepth/test/test.mapped.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.mapped.regions.bed.gz + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools_stats/test/test.md.cram.stats - path: results/reports/samtools_stats/test/test.recal.cram.stats - - path: results/reports/deeptools/test/test.bigWig diff --git a/tests/test_trimming.yml b/tests/test_trimming.yml new file mode 100644 index 0000000000..b00e19f81f --- /dev/null +++ b/tests/test_trimming.yml @@ -0,0 +1,29 @@ +- name: Run trimming pipeline + command: nextflow run main.nf -profile test,trimming,docker --save_trimmed + tags: + - trimming + - preprocessing + files: + - path: results/multiqc + - path: results/preprocessing/test/fastp/test-test_L1_1.fastp.fastq.gz + - path: results/preprocessing/test/fastp/test-test_L1_2.fastp.fastq.gz + - path: results/preprocessing/test/markduplicates/test.md.cram + - path: results/preprocessing/test/markduplicates/test.md.cram.crai + - path: results/preprocessing/test/recal_table/test.recal.table + - path: results/preprocessing/test/recalibrated/test.recal.cram + - path: results/preprocessing/test/recalibrated/test.recal.cram.crai + - path: results/csv/markduplicates.csv + - path: results/csv/markduplicates_no_table.csv + - path: results/csv/recalibrated.csv + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/markduplicates/test/test.md.metrics + - path: results/reports/mosdepth/test/test.mapped.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.mapped.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.mapped.regions.bed.gz + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + - path: results/reports/samtools_stats/test/test.md.cram.stats + - path: results/reports/samtools_stats/test/test.recal.cram.stats diff --git a/workflows/sarek.nf b/workflows/sarek.nf index f265f8eae5..184a0f99e6 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -53,7 +53,7 @@ for (param in checkPathParamList) if (param) file(param, checkIfExists: true) ch_input_sample = extract_csv(file(params.input, checkIfExists: true)) if (params.wes) { - if (params.intervals && !params.intervals.endsWith("bed")) exit 1, "Target file must be in BED format" + if (params.intervals && !params.intervals.endsWith("bed")) exit 1, "Target file specified with `--intervals` must be in BED format" } else { if (params.intervals && !params.intervals.endsWith("bed") && !params.intervals.endsWith("interval_list")) exit 1, "Interval file must end with .bed or .interval_list" } @@ -165,14 +165,11 @@ include { PREPARE_INTERVALS } from '../subwor include { ALIGNMENT_TO_FASTQ as ALIGNMENT_TO_FASTQ_INPUT } from '../subworkflows/nf-core/alignment_to_fastq' include { ALIGNMENT_TO_FASTQ as ALIGNMENT_TO_FASTQ_UMI } from '../subworkflows/nf-core/alignment_to_fastq' -// Split FASTQ files -include { SPLIT_FASTQ } from '../subworkflows/local/split_fastq' - // Run FASTQC include { RUN_FASTQC } from '../subworkflows/nf-core/run_fastqc' -// Run TRIMGALORE -include { RUN_TRIMGALORE } from '../subworkflows/nf-core/run_trimgalore' +// TRIM/SPLIT FASTQ Files +include { FASTP } from '../modules/nf-core/modules/fastp/main' // Create umi consensus bams from fastq include { CREATE_UMI_CONSENSUS } from '../subworkflows/nf-core/fgbio_create_umi_consensus/main' @@ -294,13 +291,17 @@ workflow SAREK { PREPARE_INTERVALS(fasta_fai) // Intervals for speed up preprocessing/variant calling by spread/gather - intervals_bed_combined = (params.intervals && params.wes) ? Channel.fromPath(params.intervals).collect() : [] + // this is not good, we need the combined bed for some tools that don't support scatter/gather. Why would we not use the same intervals for WGS? + // intervals_bed_combined = (params.intervals && params.wes) ? Channel.fromPath(params.intervals).collect() : [] + // check if this actually still works if interval_list format + intervals_bed_combined = params.intervals ? Channel.fromPath(params.intervals).collect() : [] + //TODO: intervals also with WGS data? Probably need a parameter if WGS for deepvariant tool, that would allow to check here too + intervals_for_preprocessing = (params.wes && params.intervals) ? intervals_bed_combined : [] intervals = PREPARE_INTERVALS.out.intervals_bed // [interval, num_intervals] multiple interval.bed files, divided by useful intervals for scatter/gather intervals_bed_gz_tbi = PREPARE_INTERVALS.out.intervals_bed_gz_tbi // [interval_bed, tbi, num_intervals] multiple interval.bed.gz/.tbi files, divided by useful intervals for scatter/gather - //TODO: intervals also with WGS data? Probably need a parameter if WGS for deepvariant tool, that would allow to check here too - intervals_for_preprocessing = (params.wes && !params.no_intervals) ? intervals_bed_combined : [] + // Gather used softwares versions ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) @@ -338,24 +339,9 @@ workflow SAREK { ch_versions = ch_versions.mix(RUN_FASTQC.out.versions) } - // Trimming - if (params.trim_fastq) { - RUN_TRIMGALORE(ch_input_fastq) - - ch_reads = RUN_TRIMGALORE.out.reads - - ch_reports = ch_reports.mix(RUN_TRIMGALORE.out.trim_html.collect{it[1]}.ifEmpty([])) - ch_reports = ch_reports.mix(RUN_TRIMGALORE.out.trim_log.collect{it[1]}.ifEmpty([])) - ch_reports = ch_reports.mix(RUN_TRIMGALORE.out.trim_zip.collect{it[1]}.ifEmpty([])) - - ch_versions = ch_versions.mix(RUN_TRIMGALORE.out.versions) - } else { - ch_reads = ch_input_fastq - } - // UMI consensus calling if (params.umi_read_structure) { - CREATE_UMI_CONSENSUS(ch_reads, + CREATE_UMI_CONSENSUS(ch_input_fastq, fasta, ch_map_index, umi_read_structure, @@ -364,25 +350,35 @@ workflow SAREK { // convert back to fastq for further preprocessing ALIGNMENT_TO_FASTQ_UMI(CREATE_UMI_CONSENSUS.out.consensusbam, []) - ch_input_sample_to_split = ALIGNMENT_TO_FASTQ_UMI.out.reads + ch_reads_fastp = ALIGNMENT_TO_FASTQ_UMI.out.reads // Gather used softwares versions ch_versions = ch_versions.mix(ALIGNMENT_TO_FASTQ_UMI.out.versions) ch_versions = ch_versions.mix(CREATE_UMI_CONSENSUS.out.versions) } else { - ch_input_sample_to_split = ch_reads + ch_reads_fastp = ch_input_fastq } - // SPLIT OF FASTQ FILES WITH SEQKIT_SPLIT2 - if (params.split_fastq > 1) { - SPLIT_FASTQ(ch_input_sample_to_split) + // Trimming and/or splitting + if (params.trim_fastq || params.split_fastq > 0) { + FASTP(ch_reads_fastp, false, false) - ch_reads_to_map = SPLIT_FASTQ.out.reads + ch_reports = ch_reports.mix(FASTP.out.json.collect{it[1]}.ifEmpty([]),FASTP.out.html.collect{it[1]}.ifEmpty([])) - // Gather used softwares versions - ch_versions = ch_versions.mix(SPLIT_FASTQ.out.versions) + if(params.split_fastq){ + ch_reads_to_map = FASTP.out.reads.map{ key, reads -> + + read_files = reads.sort{ a,b -> a.getName().tokenize('.')[0] <=> b.getName().tokenize('.')[0] }.collate(2) + [[patient: key.patient, sample:key.sample, gender:key.gender, status:key.status, id:key.id, numLanes:key.numLanes, read_group:key.read_group, data_type:key.data_type, size:read_files.size()], + read_files] + }.transpose() + }else{ + ch_reads_to_map = FASTP.out.reads + } + + ch_versions = ch_versions.mix(FASTP.out.versions) } else { - ch_reads_to_map = ch_input_sample_to_split + ch_reads_to_map = ch_reads_fastp } // STEP 1: MAPPING READS TO REFERENCE GENOME