diff --git a/main.nf b/main.nf index b1867fdba..13e5b8514 100755 --- a/main.nf +++ b/main.nf @@ -166,21 +166,7 @@ workflow NFCORE_RNASEQ { trim_unpaired = RNASEQ.out.trim_unpaired umi_log = RNASEQ.out.umi_log umi_reads = RNASEQ.out.umi_reads - umi_genomic_dedup_log = RNASEQ.out.umi_genomic_dedup_log - umi_transcriptomic_dedup_log = RNASEQ.out.umi_transcriptomic_dedup_log - umi_prepare_for_rsem_log = RNASEQ.out.umi_prepare_for_rsem_log - umi_transcriptome_dedup_bam = RNASEQ.out.umi_transcriptome_dedup_bam - umi_transcriptome_sorted_bam = RNASEQ.out.umi_transcriptome_sorted_bam - umi_transcriptome_sorted_bam_bai = RNASEQ.out.umi_transcriptome_sorted_bam_bai - umi_transcriptome_filtered_bam = RNASEQ.out.umi_transcriptome_filtered_bam - umi_dedup_stats = RNASEQ.out.umi_dedup_stats - umi_dedup_bam = RNASEQ.out.umi_dedup_bam - umi_dedup_bai = RNASEQ.out.umi_dedup_bai - umi_dedup_flagstat = RNASEQ.out.umi_dedup_flagstat - umi_dedup_idxstats = RNASEQ.out.umi_dedup_idxstats - umi_dedup_tsv_edit_distance = RNASEQ.out.umi_dedup_tsv_edit_distance - umi_dedup_tsv_per_umi = RNASEQ.out.umi_dedup_tsv_per_umi - umi_dedup_tsv_umi_per_position = RNASEQ.out.umi_dedup_tsv_umi_per_position + umi_dedup = RNASEQ.out.umi_dedup lint_log_raw = RNASEQ.out.lint_log_raw lint_log_trimmed = RNASEQ.out.lint_log_trimmed lint_log_bbsplit = RNASEQ.out.lint_log_bbsplit @@ -195,61 +181,24 @@ workflow NFCORE_RNASEQ { seqkit_converted = RNASEQ.out.seqkit_converted // Alignment outputs - star_log = RNASEQ.out.star_log - star_log_out = RNASEQ.out.star_log_out - star_log_progress = RNASEQ.out.star_log_progress - star_tab = RNASEQ.out.star_tab - star_bam = RNASEQ.out.star_bam - star_bai = RNASEQ.out.star_bai - sorted_bam_stats = RNASEQ.out.sorted_bam_stats - sorted_bam_flagstat = RNASEQ.out.sorted_bam_flagstat - sorted_bam_idxstats = RNASEQ.out.sorted_bam_idxstats + star = RNASEQ.out.star + samtools = RNASEQ.out.samtools transcriptome_bam = RNASEQ.out.transcriptome_bam unaligned_sequences = RNASEQ.out.unaligned_sequences hisat2_summary = RNASEQ.out.hisat2_summary samtools_bai = RNASEQ.out.samtools_bai // MarkDuplicates outputs - markdup_bam = RNASEQ.out.markdup_bam - markdup_bai = RNASEQ.out.markdup_bai - markdup_metrics = RNASEQ.out.markdup_metrics - markdup_stats = RNASEQ.out.markdup_stats - markdup_flagstat = RNASEQ.out.markdup_flagstat - markdup_idxstats = RNASEQ.out.markdup_idxstats + markdup = RNASEQ.out.markdup // QC outputs preseq_txt = RNASEQ.out.preseq_txt preseq_log = RNASEQ.out.preseq_log qualimap_results = RNASEQ.out.qualimap_results - dupradar_scatter = RNASEQ.out.dupradar_scatter - dupradar_boxplot = RNASEQ.out.dupradar_boxplot - dupradar_histogram = RNASEQ.out.dupradar_histogram - dupradar_gene_data = RNASEQ.out.dupradar_gene_data - dupradar_intercept = RNASEQ.out.dupradar_intercept + dupradar = RNASEQ.out.dupradar // RSeQC outputs - rseqc_bamstat = RNASEQ.out.rseqc_bamstat - rseqc_inferexperiment = RNASEQ.out.rseqc_inferexperiment - rseqc_junctionannotation_bed = RNASEQ.out.rseqc_junctionannotation_bed - rseqc_junctionannotation_interact_bed = RNASEQ.out.rseqc_junctionannotation_interact_bed - rseqc_junctionannotation_xls = RNASEQ.out.rseqc_junctionannotation_xls - rseqc_junctionannotation_log = RNASEQ.out.rseqc_junctionannotation_log - rseqc_junctionannotation_pdf = RNASEQ.out.rseqc_junctionannotation_pdf - rseqc_junctionannotation_events_pdf = RNASEQ.out.rseqc_junctionannotation_events_pdf - rseqc_junctionannotation_r = RNASEQ.out.rseqc_junctionannotation_r - rseqc_junctionsaturation_pdf = RNASEQ.out.rseqc_junctionsaturation_pdf - rseqc_junctionsaturation_r = RNASEQ.out.rseqc_junctionsaturation_r - rseqc_readduplication_pos_xls = RNASEQ.out.rseqc_readduplication_pos_xls - rseqc_readduplication_seq_xls = RNASEQ.out.rseqc_readduplication_seq_xls - rseqc_readduplication_pdf = RNASEQ.out.rseqc_readduplication_pdf - rseqc_readduplication_r = RNASEQ.out.rseqc_readduplication_r - rseqc_readdistribution = RNASEQ.out.rseqc_readdistribution - rseqc_innerdistance_txt = RNASEQ.out.rseqc_innerdistance_txt - rseqc_innerdistance_distance = RNASEQ.out.rseqc_innerdistance_distance - rseqc_innerdistance_mean = RNASEQ.out.rseqc_innerdistance_mean - rseqc_innerdistance_pdf = RNASEQ.out.rseqc_innerdistance_pdf - rseqc_innerdistance_r = RNASEQ.out.rseqc_innerdistance_r - rseqc_tin = RNASEQ.out.rseqc_tin + rseqc = RNASEQ.out.rseqc // Contaminant screening outputs kraken_report = RNASEQ.out.kraken_report @@ -261,12 +210,11 @@ workflow NFCORE_RNASEQ { stringtie_outputs = RNASEQ.out.stringtie_outputs featurecounts_outputs = RNASEQ.out.featurecounts_outputs bigwig_outputs = RNASEQ.out.bigwig_outputs - pseudo_outputs = RNASEQ.out.pseudo_outputs - rsem_logs = RNASEQ.out.rsem_logs - rsem_results = RNASEQ.out.rsem_results - star_salmon_outputs = RNASEQ.out.star_salmon_outputs - deseq2_outputs = RNASEQ.out.deseq2_outputs - pseudo_deseq2_outputs = RNASEQ.out.pseudo_deseq2_outputs + pseudo = RNASEQ.out.pseudo + rsem = RNASEQ.out.rsem + star_salmon = RNASEQ.out.star_salmon + deseq2 = RNASEQ.out.deseq2 + pseudo_deseq2 = RNASEQ.out.pseudo_deseq2 // MultiQC outputs multiqc_data = RNASEQ.out.multiqc_data @@ -350,21 +298,24 @@ workflow { trim_unpaired = NFCORE_RNASEQ.out.trim_unpaired.ifEmpty([]) umi_log = NFCORE_RNASEQ.out.umi_log.ifEmpty([]) umi_reads = NFCORE_RNASEQ.out.umi_reads.ifEmpty([]) - umi_genomic_dedup_log = NFCORE_RNASEQ.out.umi_genomic_dedup_log.ifEmpty([]) - umi_transcriptomic_dedup_log = NFCORE_RNASEQ.out.umi_transcriptomic_dedup_log.ifEmpty([]) - umi_prepare_for_rsem_log = NFCORE_RNASEQ.out.umi_prepare_for_rsem_log.ifEmpty([]) - umi_transcriptome_dedup_bam = NFCORE_RNASEQ.out.umi_transcriptome_dedup_bam.ifEmpty([]) - umi_transcriptome_sorted_bam = NFCORE_RNASEQ.out.umi_transcriptome_sorted_bam.ifEmpty([]) - umi_transcriptome_sorted_bam_bai = NFCORE_RNASEQ.out.umi_transcriptome_sorted_bam_bai.ifEmpty([]) - umi_transcriptome_filtered_bam = NFCORE_RNASEQ.out.umi_transcriptome_filtered_bam.ifEmpty([]) - umi_dedup_stats = NFCORE_RNASEQ.out.umi_dedup_stats.ifEmpty([]) - umi_dedup_bam = NFCORE_RNASEQ.out.umi_dedup_bam.ifEmpty([]) - umi_dedup_bai = NFCORE_RNASEQ.out.umi_dedup_bai.ifEmpty([]) - umi_dedup_flagstat = NFCORE_RNASEQ.out.umi_dedup_flagstat.ifEmpty([]) - umi_dedup_idxstats = NFCORE_RNASEQ.out.umi_dedup_idxstats.ifEmpty([]) - umi_dedup_tsv_edit_distance = NFCORE_RNASEQ.out.umi_dedup_tsv_edit_distance.ifEmpty([]) - umi_dedup_tsv_per_umi = NFCORE_RNASEQ.out.umi_dedup_tsv_per_umi.ifEmpty([]) - umi_dedup_tsv_umi_per_position = NFCORE_RNASEQ.out.umi_dedup_tsv_umi_per_position.ifEmpty([]) + umi_genomic_dedup_log = NFCORE_RNASEQ.out.umi_dedup.map { r -> [r.meta, r.genomic_dedup_log] }.ifEmpty([]) + umi_transcriptomic_dedup_log = NFCORE_RNASEQ.out.umi_dedup.map { r -> [r.meta, r.transcriptomic_dedup_log] }.ifEmpty([]) + umi_prepare_for_rsem_log = NFCORE_RNASEQ.out.umi_dedup.map { r -> [r.meta, r.prepare_for_rsem_log] }.ifEmpty([]) + umi_transcriptome_dedup_bam = NFCORE_RNASEQ.out.umi_dedup.map { r -> [r.meta, r.transcriptome_dedup_bam] }.ifEmpty([]) + umi_transcriptome_sorted_bam = NFCORE_RNASEQ.out.umi_dedup.map { r -> [r.meta, r.transcriptome_sorted_bam] }.ifEmpty([]) + umi_transcriptome_sorted_bam_bai = NFCORE_RNASEQ.out.umi_dedup.map { r -> [r.meta, r.transcriptome_sorted_bam_bai] }.ifEmpty([]) + umi_transcriptome_filtered_bam = NFCORE_RNASEQ.out.umi_dedup.map { r -> [r.meta, r.transcriptome_filtered_bam] }.ifEmpty([]) + umi_dedup_genome_stats = NFCORE_RNASEQ.out.umi_dedup.map { r -> [r.meta, r.genome_stats] }.ifEmpty([]) + umi_dedup_genome_flagstat = NFCORE_RNASEQ.out.umi_dedup.map { r -> [r.meta, r.genome_flagstat] }.ifEmpty([]) + umi_dedup_genome_idxstats = NFCORE_RNASEQ.out.umi_dedup.map { r -> [r.meta, r.genome_idxstats] }.ifEmpty([]) + umi_dedup_transcriptome_stats = NFCORE_RNASEQ.out.umi_dedup.map { r -> [r.meta, r.transcriptome_stats] }.ifEmpty([]) + umi_dedup_transcriptome_flagstat = NFCORE_RNASEQ.out.umi_dedup.map { r -> [r.meta, r.transcriptome_flagstat] }.ifEmpty([]) + umi_dedup_transcriptome_idxstats = NFCORE_RNASEQ.out.umi_dedup.map { r -> [r.meta, r.transcriptome_idxstats] }.ifEmpty([]) + umi_dedup_bam = NFCORE_RNASEQ.out.umi_dedup.map { r -> [r.meta, r.bam] }.ifEmpty([]) + umi_dedup_bai = NFCORE_RNASEQ.out.umi_dedup.map { r -> [r.meta, r.bai] }.ifEmpty([]) + umi_dedup_tsv_edit_distance = NFCORE_RNASEQ.out.umi_dedup.map { r -> [r.meta, r.tsv_edit_distance] }.ifEmpty([]) + umi_dedup_tsv_per_umi = NFCORE_RNASEQ.out.umi_dedup.map { r -> [r.meta, r.tsv_per_umi] }.ifEmpty([]) + umi_dedup_tsv_umi_per_position = NFCORE_RNASEQ.out.umi_dedup.map { r -> [r.meta, r.tsv_umi_per_position] }.ifEmpty([]) lint_log_raw = NFCORE_RNASEQ.out.lint_log_raw.ifEmpty([]) lint_log_trimmed = NFCORE_RNASEQ.out.lint_log_trimmed.ifEmpty([]) lint_log_bbsplit = NFCORE_RNASEQ.out.lint_log_bbsplit.ifEmpty([]) @@ -378,62 +329,62 @@ workflow { seqkit_prefixed = NFCORE_RNASEQ.out.seqkit_prefixed.ifEmpty([]) seqkit_converted = NFCORE_RNASEQ.out.seqkit_converted.ifEmpty([]) - // Alignment outputs - star_log = NFCORE_RNASEQ.out.star_log.ifEmpty([]) - star_log_out = NFCORE_RNASEQ.out.star_log_out.ifEmpty([]) - star_log_progress = NFCORE_RNASEQ.out.star_log_progress.ifEmpty([]) - star_tab = NFCORE_RNASEQ.out.star_tab.ifEmpty([]) - star_bam = NFCORE_RNASEQ.out.star_bam.ifEmpty([]) - star_bai = NFCORE_RNASEQ.out.star_bai.ifEmpty([]) - sorted_bam_stats = NFCORE_RNASEQ.out.sorted_bam_stats.ifEmpty([]) - sorted_bam_flagstat = NFCORE_RNASEQ.out.sorted_bam_flagstat.ifEmpty([]) - sorted_bam_idxstats = NFCORE_RNASEQ.out.sorted_bam_idxstats.ifEmpty([]) + // Alignment outputs - extract from StarAlignResult and SamtoolsResult records + star_log = NFCORE_RNASEQ.out.star.map { r -> [r.meta, r.log_final] }.ifEmpty([]) + star_log_out = NFCORE_RNASEQ.out.star.map { r -> [r.meta, r.log_out] }.ifEmpty([]) + star_log_progress = NFCORE_RNASEQ.out.star.map { r -> [r.meta, r.log_progress] }.ifEmpty([]) + star_tab = NFCORE_RNASEQ.out.star.map { r -> [r.meta, r.tab] }.ifEmpty([]) + star_bam = NFCORE_RNASEQ.out.star.map { r -> [r.meta, r.bam] }.ifEmpty([]) + star_bai = NFCORE_RNASEQ.out.samtools.map { r -> [r.meta, r.bai] }.ifEmpty([]) + sorted_bam_stats = NFCORE_RNASEQ.out.samtools.map { r -> [r.meta, r.stats] }.ifEmpty([]) + sorted_bam_flagstat = NFCORE_RNASEQ.out.samtools.map { r -> [r.meta, r.flagstat] }.ifEmpty([]) + sorted_bam_idxstats = NFCORE_RNASEQ.out.samtools.map { r -> [r.meta, r.idxstats] }.ifEmpty([]) transcriptome_bam = NFCORE_RNASEQ.out.transcriptome_bam.ifEmpty([]) unaligned_sequences = NFCORE_RNASEQ.out.unaligned_sequences.ifEmpty([]) hisat2_summary = NFCORE_RNASEQ.out.hisat2_summary.ifEmpty([]) samtools_bai = NFCORE_RNASEQ.out.samtools_bai.ifEmpty([]) - // MarkDuplicates outputs - markdup_bam = NFCORE_RNASEQ.out.markdup_bam.ifEmpty([]) - markdup_bai = NFCORE_RNASEQ.out.markdup_bai.ifEmpty([]) - markdup_metrics = NFCORE_RNASEQ.out.markdup_metrics.ifEmpty([]) - markdup_stats = NFCORE_RNASEQ.out.markdup_stats.ifEmpty([]) - markdup_flagstat = NFCORE_RNASEQ.out.markdup_flagstat.ifEmpty([]) - markdup_idxstats = NFCORE_RNASEQ.out.markdup_idxstats.ifEmpty([]) + // MarkDuplicates outputs - extract from MarkDupResult record + markdup_bam = NFCORE_RNASEQ.out.markdup.map { r -> [r.meta, r.bam] }.ifEmpty([]) + markdup_bai = NFCORE_RNASEQ.out.markdup.map { r -> [r.meta, r.bai] }.ifEmpty([]) + markdup_metrics = NFCORE_RNASEQ.out.markdup.map { r -> [r.meta, r.metrics] }.ifEmpty([]) + markdup_stats = NFCORE_RNASEQ.out.markdup.map { r -> [r.meta, r.stats] }.ifEmpty([]) + markdup_flagstat = NFCORE_RNASEQ.out.markdup.map { r -> [r.meta, r.flagstat] }.ifEmpty([]) + markdup_idxstats = NFCORE_RNASEQ.out.markdup.map { r -> [r.meta, r.idxstats] }.ifEmpty([]) // QC outputs preseq_txt = NFCORE_RNASEQ.out.preseq_txt.ifEmpty([]) preseq_log = NFCORE_RNASEQ.out.preseq_log.ifEmpty([]) qualimap_results = NFCORE_RNASEQ.out.qualimap_results.ifEmpty([]) - dupradar_scatter = NFCORE_RNASEQ.out.dupradar_scatter.ifEmpty([]) - dupradar_boxplot = NFCORE_RNASEQ.out.dupradar_boxplot.ifEmpty([]) - dupradar_histogram = NFCORE_RNASEQ.out.dupradar_histogram.ifEmpty([]) - dupradar_gene_data = NFCORE_RNASEQ.out.dupradar_gene_data.ifEmpty([]) - dupradar_intercept = NFCORE_RNASEQ.out.dupradar_intercept.ifEmpty([]) - - // RSeQC outputs - rseqc_bamstat = NFCORE_RNASEQ.out.rseqc_bamstat.ifEmpty([]) - rseqc_inferexperiment = NFCORE_RNASEQ.out.rseqc_inferexperiment.ifEmpty([]) - rseqc_junctionannotation_bed = NFCORE_RNASEQ.out.rseqc_junctionannotation_bed.ifEmpty([]) - rseqc_junctionannotation_interact_bed = NFCORE_RNASEQ.out.rseqc_junctionannotation_interact_bed.ifEmpty([]) - rseqc_junctionannotation_xls = NFCORE_RNASEQ.out.rseqc_junctionannotation_xls.ifEmpty([]) - rseqc_junctionannotation_log = NFCORE_RNASEQ.out.rseqc_junctionannotation_log.ifEmpty([]) - rseqc_junctionannotation_pdf = NFCORE_RNASEQ.out.rseqc_junctionannotation_pdf.ifEmpty([]) - rseqc_junctionannotation_events_pdf = NFCORE_RNASEQ.out.rseqc_junctionannotation_events_pdf.ifEmpty([]) - rseqc_junctionannotation_r = NFCORE_RNASEQ.out.rseqc_junctionannotation_r.ifEmpty([]) - rseqc_junctionsaturation_pdf = NFCORE_RNASEQ.out.rseqc_junctionsaturation_pdf.ifEmpty([]) - rseqc_junctionsaturation_r = NFCORE_RNASEQ.out.rseqc_junctionsaturation_r.ifEmpty([]) - rseqc_readduplication_pos_xls = NFCORE_RNASEQ.out.rseqc_readduplication_pos_xls.ifEmpty([]) - rseqc_readduplication_seq_xls = NFCORE_RNASEQ.out.rseqc_readduplication_seq_xls.ifEmpty([]) - rseqc_readduplication_pdf = NFCORE_RNASEQ.out.rseqc_readduplication_pdf.ifEmpty([]) - rseqc_readduplication_r = NFCORE_RNASEQ.out.rseqc_readduplication_r.ifEmpty([]) - rseqc_readdistribution = NFCORE_RNASEQ.out.rseqc_readdistribution.ifEmpty([]) - rseqc_innerdistance_txt = NFCORE_RNASEQ.out.rseqc_innerdistance_txt.ifEmpty([]) - rseqc_innerdistance_distance = NFCORE_RNASEQ.out.rseqc_innerdistance_distance.ifEmpty([]) - rseqc_innerdistance_mean = NFCORE_RNASEQ.out.rseqc_innerdistance_mean.ifEmpty([]) - rseqc_innerdistance_pdf = NFCORE_RNASEQ.out.rseqc_innerdistance_pdf.ifEmpty([]) - rseqc_innerdistance_r = NFCORE_RNASEQ.out.rseqc_innerdistance_r.ifEmpty([]) - rseqc_tin = NFCORE_RNASEQ.out.rseqc_tin.ifEmpty([]) + dupradar_scatter = NFCORE_RNASEQ.out.dupradar.map { r -> [r.meta, r.scatter] }.ifEmpty([]) + dupradar_boxplot = NFCORE_RNASEQ.out.dupradar.map { r -> [r.meta, r.boxplot] }.ifEmpty([]) + dupradar_histogram = NFCORE_RNASEQ.out.dupradar.map { r -> [r.meta, r.histogram] }.ifEmpty([]) + dupradar_gene_data = NFCORE_RNASEQ.out.dupradar.map { r -> [r.meta, r.gene_data] }.ifEmpty([]) + dupradar_intercept = NFCORE_RNASEQ.out.dupradar.map { r -> [r.meta, r.intercept] }.ifEmpty([]) + + // RSeQC outputs - use .map to extract fields; null-safe ?. for nested records + rseqc_bamstat = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.bamstat] }.ifEmpty([]) + rseqc_inferexperiment = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.inferexperiment] }.ifEmpty([]) + rseqc_junctionannotation_bed = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.junction_annotation?.bed] }.ifEmpty([]) + rseqc_junctionannotation_interact_bed = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.junction_annotation?.interact_bed] }.ifEmpty([]) + rseqc_junctionannotation_xls = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.junction_annotation?.xls] }.ifEmpty([]) + rseqc_junctionannotation_log = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.junction_annotation?.log] }.ifEmpty([]) + rseqc_junctionannotation_pdf = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.junction_annotation?.pdf] }.ifEmpty([]) + rseqc_junctionannotation_events_pdf = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.junction_annotation?.events_pdf] }.ifEmpty([]) + rseqc_junctionannotation_r = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.junction_annotation?.rscript] }.ifEmpty([]) + rseqc_junctionsaturation_pdf = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.junctionsaturation_pdf] }.ifEmpty([]) + rseqc_junctionsaturation_r = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.junctionsaturation_r] }.ifEmpty([]) + rseqc_readduplication_pos_xls = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.read_duplication?.pos_xls] }.ifEmpty([]) + rseqc_readduplication_seq_xls = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.read_duplication?.seq_xls] }.ifEmpty([]) + rseqc_readduplication_pdf = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.read_duplication?.pdf] }.ifEmpty([]) + rseqc_readduplication_r = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.read_duplication?.rscript] }.ifEmpty([]) + rseqc_readdistribution = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.readdistribution] }.ifEmpty([]) + rseqc_innerdistance_txt = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.inner_distance?.freq] }.ifEmpty([]) + rseqc_innerdistance_distance = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.inner_distance?.distance] }.ifEmpty([]) + rseqc_innerdistance_mean = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.inner_distance?.mean] }.ifEmpty([]) + rseqc_innerdistance_pdf = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.inner_distance?.pdf] }.ifEmpty([]) + rseqc_innerdistance_r = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.inner_distance?.rscript] }.ifEmpty([]) + rseqc_tin = NFCORE_RNASEQ.out.rseqc.map { r -> [r.meta, r.tin] }.ifEmpty([]) // Contaminant screening outputs kraken_report = NFCORE_RNASEQ.out.kraken_report.ifEmpty([]) @@ -441,16 +392,15 @@ workflow { sylph_profile = NFCORE_RNASEQ.out.sylph_profile.ifEmpty([]) sylphtax_output = NFCORE_RNASEQ.out.sylphtax_output.ifEmpty([]) - // Consolidated outputs + // Consolidated outputs - records publish all fields to their respective directories stringtie_outputs = NFCORE_RNASEQ.out.stringtie_outputs.ifEmpty([]) featurecounts_outputs = NFCORE_RNASEQ.out.featurecounts_outputs.ifEmpty([]) bigwig_outputs = NFCORE_RNASEQ.out.bigwig_outputs.ifEmpty([]) - pseudo_outputs = NFCORE_RNASEQ.out.pseudo_outputs.ifEmpty([]) - rsem_logs = NFCORE_RNASEQ.out.rsem_logs.ifEmpty([]) - rsem_results = NFCORE_RNASEQ.out.rsem_results.ifEmpty([]) - star_salmon_outputs = NFCORE_RNASEQ.out.star_salmon_outputs.ifEmpty([]) - deseq2_outputs = NFCORE_RNASEQ.out.deseq2_outputs.ifEmpty([]) - pseudo_deseq2_outputs = NFCORE_RNASEQ.out.pseudo_deseq2_outputs.ifEmpty([]) + pseudo = NFCORE_RNASEQ.out.pseudo.ifEmpty([]) + rsem = NFCORE_RNASEQ.out.rsem.ifEmpty([]) + star_salmon = NFCORE_RNASEQ.out.star_salmon.ifEmpty([]) + deseq2 = NFCORE_RNASEQ.out.deseq2.ifEmpty([]) + pseudo_deseq2 = NFCORE_RNASEQ.out.pseudo_deseq2.ifEmpty([]) // MultiQC report multiqc_report = NFCORE_RNASEQ.out.multiqc_report.ifEmpty([]) @@ -505,11 +455,14 @@ output { umi_transcriptome_sorted_bam { path { params.aligner } } umi_transcriptome_sorted_bam_bai { path { params.aligner } } umi_transcriptome_filtered_bam { path { params.aligner } } - umi_dedup_stats { path { "${params.aligner}/samtools_stats" } } + umi_dedup_genome_stats { path { "${params.aligner}/samtools_stats" } } + umi_dedup_genome_flagstat { path { "${params.aligner}/samtools_stats" } } + umi_dedup_genome_idxstats { path { "${params.aligner}/samtools_stats" } } + umi_dedup_transcriptome_stats { path { "${params.aligner}/samtools_stats" } } + umi_dedup_transcriptome_flagstat { path { "${params.aligner}/samtools_stats" } } + umi_dedup_transcriptome_idxstats { path { "${params.aligner}/samtools_stats" } } umi_dedup_bam { path { params.aligner } } umi_dedup_bai { path { params.aligner } } - umi_dedup_flagstat { path { "${params.aligner}/samtools_stats" } } - umi_dedup_idxstats { path { "${params.aligner}/samtools_stats" } } umi_dedup_tsv_edit_distance { path { "${params.aligner}/${params.umi_dedup_tool}" } } umi_dedup_tsv_per_umi { path { "${params.aligner}/${params.umi_dedup_tool}" } } umi_dedup_tsv_umi_per_position { path { "${params.aligner}/${params.umi_dedup_tool}" } } @@ -589,18 +542,15 @@ output { sylph_profile { path { "${params.aligner}/contaminants/sylph" } } sylphtax_output { path { "${params.aligner}/contaminants/sylph" } } - // Consolidated outputs + // Consolidated outputs - record channels publish all fields to one directory stringtie_outputs { path { "${params.aligner}/stringtie" } } featurecounts_outputs { path { "${params.aligner}/featurecounts" } } bigwig_outputs { path { "${params.aligner}/bigwig" } } - pseudo_outputs { path { params.pseudo_aligner } } - star_salmon_outputs { path 'star_salmon' } - deseq2_outputs { path { "${params.aligner}/deseq2_qc" } } - pseudo_deseq2_outputs { path { "${params.pseudo_aligner}/deseq2_qc" } } - - // RSEM outputs - rsem_logs { path 'star_rsem/log' } - rsem_results { path 'star_rsem' } + pseudo { path { params.pseudo_aligner } } + star_salmon { path 'star_salmon' } + deseq2 { path { "${params.aligner}/deseq2_qc" } } + pseudo_deseq2 { path { "${params.pseudo_aligner}/deseq2_qc" } } + rsem { path 'star_rsem' } // MultiQC report multiqc_report { path { params.skip_alignment ? 'multiqc' : "multiqc/${params.aligner}" } } diff --git a/modules/local/deseq2_qc/main.nf b/modules/local/deseq2_qc/main.nf index 193c43351..ef3c06dad 100644 --- a/modules/local/deseq2_qc/main.nf +++ b/modules/local/deseq2_qc/main.nf @@ -1,3 +1,16 @@ +nextflow.preview.types = true + +record DeSeq2Result { + pdf: Path? + rdata: Path? + pca_txt: Path? + pca_multiqc: Path? + dists_txt: Path? + dists_multiqc: Path? + log: Path? + size_factors: Path? +} + process DESEQ2_QC { label "process_medium" @@ -9,20 +22,23 @@ process DESEQ2_QC { 'community.wave.seqera.io/library/r-base_r-optparse_r-ggplot2_r-rcolorbrewer_pruned:9e75394d0bc21987' }" input: - path counts - path pca_header_multiqc - path clustering_header_multiqc + counts: Path + pca_header_multiqc: Path + clustering_header_multiqc: Path output: - path "*.pdf" , optional:true, emit: pdf - path "*.RData" , optional:true, emit: rdata - path "*pca.vals.txt" , optional:true, emit: pca_txt - path "*pca.vals_mqc.tsv" , optional:true, emit: pca_multiqc - path "*sample.dists.txt" , optional:true, emit: dists_txt - path "*sample.dists_mqc.tsv", optional:true, emit: dists_multiqc - path "*.log" , optional:true, emit: log - path "size_factors" , optional:true, emit: size_factors - path "versions.yml" , emit: versions + record( + pdf: file("*.pdf", optional: true), + rdata: file("*.RData", optional: true), + pca_txt: file("*pca.vals.txt", optional: true), + pca_multiqc: file("*pca.vals_mqc.tsv", optional: true), + dists_txt: file("*sample.dists.txt", optional: true), + dists_multiqc: file("*sample.dists_mqc.tsv", optional: true), + log: file("*.log", optional: true), + size_factors: file("size_factors", optional: true) + ) + tuple val("${task.process}"), val('r-base'), eval('echo $(R --version 2>&1) | sed "s/^.*R version //; s/ .*$//"'), topic: versions + tuple val("${task.process}"), val('bioconductor-deseq2'), eval('Rscript -e "library(DESeq2); cat(as.character(packageVersion(\'DESeq2\')))"'), topic: versions when: task.ext.when == null || task.ext.when @@ -54,12 +70,6 @@ process DESEQ2_QC { cat clustering_header.tmp *.sample.dists.txt > ${label_lower}.sample.dists_mqc.tsv rm clustering_header.tmp fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') - bioconductor-deseq2: \$(Rscript -e "library(DESeq2); cat(as.character(packageVersion('DESeq2')))") - END_VERSIONS """ stub: @@ -81,11 +91,5 @@ process DESEQ2_QC { do touch size_factors/\${i}.size_factors.RData done - - cat <<-END_VERSIONS >versions.yml - "${task.process}": - r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') - bioconductor-deseq2: \$(Rscript -e "library(DESeq2); cat(as.character(packageVersion('DESeq2')))") - END_VERSIONS """ } diff --git a/modules/local/rsem_merge_counts/main.nf b/modules/local/rsem_merge_counts/main.nf index 4daf6d97c..e6c2cdfec 100644 --- a/modules/local/rsem_merge_counts/main.nf +++ b/modules/local/rsem_merge_counts/main.nf @@ -1,3 +1,14 @@ +nextflow.preview.types = true + +record RsemMergedResult { + counts_gene: Path + tpm_gene: Path + counts_transcript: Path + tpm_transcript: Path + genes_long: Path + isoforms_long: Path +} + process RSEM_MERGE_COUNTS { label "process_medium" @@ -7,17 +18,19 @@ process RSEM_MERGE_COUNTS { 'nf-core/ubuntu:20.04' }" input: - path ('genes/*') - path ('isoforms/*') + genes: Path // path ('genes/*') + isoforms: Path // path ('isoforms/*') output: - path "rsem.merged.gene_counts.tsv" , emit: counts_gene - path "rsem.merged.gene_tpm.tsv" , emit: tpm_gene - path "rsem.merged.transcript_counts.tsv", emit: counts_transcript - path "rsem.merged.transcript_tpm.tsv" , emit: tpm_transcript - path "rsem.merged.genes_long.tsv" , emit: genes_long - path "rsem.merged.isoforms_long.tsv" , emit: isoforms_long - path "versions.yml" , emit: versions + record( + counts_gene: file("rsem.merged.gene_counts.tsv"), + tpm_gene: file("rsem.merged.gene_tpm.tsv"), + counts_transcript: file("rsem.merged.transcript_counts.tsv"), + tpm_transcript: file("rsem.merged.transcript_tpm.tsv"), + genes_long: file("rsem.merged.genes_long.tsv"), + isoforms_long: file("rsem.merged.isoforms_long.tsv") + ) + tuple val("${task.process}"), val('sed'), eval('echo $(sed --version 2>&1) | sed "s/^.*GNU sed) //; s/ .*$//"'), topic: versions when: task.ext.when == null || task.ext.when @@ -62,11 +75,6 @@ process RSEM_MERGE_COUNTS { samplename=`basename \$fileid | sed s/\\.isoforms.results\$//g` tail -n+2 \$fileid | awk -v sample=\$samplename 'BEGIN{OFS="\t"}{print sample,\$1,\$2,\$3,\$4,\$5,\$6,\$7,\$8}' >> rsem.merged.isoforms_long.tsv done - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') - END_VERSIONS """ stub: @@ -77,10 +85,5 @@ process RSEM_MERGE_COUNTS { touch rsem.merged.transcript_tpm.tsv touch rsem.merged.genes_long.tsv touch rsem.merged.isoforms_long.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') - END_VERSIONS """ } diff --git a/modules/local/star_align_igenomes/main.nf b/modules/local/star_align_igenomes/main.nf index 74c323fa3..313893ac4 100644 --- a/modules/local/star_align_igenomes/main.nf +++ b/modules/local/star_align_igenomes/main.nf @@ -1,3 +1,26 @@ +nextflow.preview.types = true + +// Uses the same record type as STAR_ALIGN +record StarAlignResult { + meta: Map + bam: Path? + bam_sorted: Path? + bam_sorted_aligned: Path? + bam_transcript: Path? + bam_unsorted: Path? + log_final: Path + log_out: Path + log_progress: Path + fastq: Path? + tab: Path? + spl_junc_tab: Path? + read_per_gene_tab: Path? + junction: Path? + sam: Path? + wig: Path? + bedgraph: Path? +} + process STAR_ALIGN_IGENOMES { tag "$meta.id" label 'process_high' @@ -8,27 +31,39 @@ process STAR_ALIGN_IGENOMES { 'community.wave.seqera.io/library/star_samtools_gawk:79ca42311e583cdc' }" input: - tuple val(meta), path(reads, stageAs: "input*/*") - tuple val(meta2), path(index) - tuple val(meta3), path(gtf) - val star_ignore_sjdbgtf - val seq_platform - val seq_center + (meta: Map, reads: Path): Record + (meta2: Map, index: Path): Record - output: - tuple val(meta), path('*Log.final.out') , emit: log_final - tuple val(meta), path('*Log.out') , emit: log_out - tuple val(meta), path('*Log.progress.out'), emit: log_progress - path "versions.yml" , emit: versions + stage: + stageAs(reads, 'input*/*') + (meta3: Map, gtf: Path): Record + star_ignore_sjdbgtf: String? + seq_platform: String? + seq_center: String? - tuple val(meta), path('*d.out.bam') , optional:true, emit: bam - tuple val(meta), path('*sortedByCoord.out.bam') , optional:true, emit: bam_sorted - tuple val(meta), path('*toTranscriptome.out.bam'), optional:true, emit: bam_transcript - tuple val(meta), path('*Aligned.unsort.out.bam') , optional:true, emit: bam_unsorted - tuple val(meta), path('*fastq.gz') , optional:true, emit: fastq - tuple val(meta), path('*.tab') , optional:true, emit: tab - tuple val(meta), path('*.out.junction') , optional:true, emit: junction - tuple val(meta), path('*.out.sam') , optional:true, emit: sam + output: + record( + meta: meta, + bam: file('*d.out.bam', optional: true), + bam_sorted: file('*sortedByCoord.out.bam', optional: true), + bam_sorted_aligned: file("*.Aligned.sortedByCoord.out.bam", optional: true), + bam_transcript: file('*toTranscriptome.out.bam', optional: true), + bam_unsorted: file('*Aligned.unsort.out.bam', optional: true), + log_final: file('*Log.final.out'), + log_out: file('*Log.out'), + log_progress: file('*Log.progress.out'), + fastq: file('*fastq.gz', optional: true), + tab: file('*.tab', optional: true), + spl_junc_tab: file('*.SJ.out.tab', optional: true), + read_per_gene_tab: file('*.ReadsPerGene.out.tab', optional: true), + junction: file('*.out.junction', optional: true), + sam: file('*.out.sam', optional: true), + wig: file('*.wig', optional: true), + bedgraph: file('*.bg', optional: true) + ) + tuple val("${task.process}"), val('star'), eval('STAR --version | sed -e "s/STAR_//g"'), topic: versions + tuple val("${task.process}"), val('samtools'), eval('echo $(samtools --version 2>&1) | sed "s/^.*samtools //; s/Using.*$//"'), topic: versions + tuple val("${task.process}"), val('gawk'), eval('echo $(gawk --version 2>&1) | sed "s/^.*GNU Awk //; s/, .*$//"'), topic: versions when: task.ext.when == null || task.ext.when @@ -65,13 +100,6 @@ process STAR_ALIGN_IGENOMES { mv ${prefix}.Unmapped.out.mate2 ${prefix}.unmapped_2.fastq gzip ${prefix}.unmapped_2.fastq fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - star: \$(STAR --version | sed -e "s/STAR_//g") - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') - END_VERSIONS """ stub: @@ -94,12 +122,5 @@ process STAR_ALIGN_IGENOMES { touch ${prefix}.out.sam touch ${prefix}.Signal.UniqueMultiple.str1.out.wig touch ${prefix}.Signal.UniqueMultiple.str1.out.bg - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - star: \$(STAR --version | sed -e "s/STAR_//g") - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/dupradar/main.nf b/modules/nf-core/dupradar/main.nf index 8006e782f..64e021277 100644 --- a/modules/nf-core/dupradar/main.nf +++ b/modules/nf-core/dupradar/main.nf @@ -1,3 +1,16 @@ +nextflow.preview.types = true + +record DupRadarResult { + meta: Map + scatter: Path + boxplot: Path + histogram: Path + gene_data: Path + intercept: Path + multiqc: Path + session_info: Path +} + process DUPRADAR { tag "$meta.id" label 'process_long' @@ -8,18 +21,21 @@ process DUPRADAR { 'community.wave.seqera.io/library/bioconductor-dupradar:1.38.0--831da16eb40a64ab' }" input: - tuple val(meta), path(bam) - tuple val(meta2), path(gtf) + (meta: Map, bam: Path): Record + (meta2: Map, gtf: Path): Record output: - tuple val(meta), path("*_duprateExpDens.pdf") , emit: scatter2d - tuple val(meta), path("*_duprateExpBoxplot.pdf"), emit: boxplot - tuple val(meta), path("*_expressionHist.pdf") , emit: hist - tuple val(meta), path("*_dupMatrix.txt") , emit: dupmatrix - tuple val(meta), path("*_intercept_slope.txt") , emit: intercept_slope - tuple val(meta), path("*_mqc.txt") , emit: multiqc - tuple val(meta), path("*.R_sessionInfo.log") , emit: session_info - path "versions.yml" , emit: versions + record( + meta: meta, + scatter: file("*_duprateExpDens.pdf"), + boxplot: file("*_duprateExpBoxplot.pdf"), + histogram: file("*_expressionHist.pdf"), + gene_data: file("*_dupMatrix.txt"), + intercept: file("*_intercept_slope.txt"), + multiqc: file("*_mqc.txt"), + session_info: file("*.R_sessionInfo.log") + ) + tuple val("${task.process}"), val('bioconductor-dupradar'), eval('Rscript -e "library(dupRadar); cat(as.character(packageVersion(\'dupRadar\')))"'), topic: versions when: task.ext.when == null || task.ext.when @@ -37,10 +53,5 @@ process DUPRADAR { touch ${meta.id}_dup_intercept_mqc.txt touch ${meta.id}_duprateExpDensCurve_mqc.txt touch ${meta.id}.R_sessionInfo.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bioconductor-dupradar: \$(Rscript -e "library(dupRadar); cat(as.character(packageVersion('dupRadar')))") - END_VERSIONS """ } diff --git a/modules/nf-core/dupradar/templates/dupradar.r b/modules/nf-core/dupradar/templates/dupradar.r index 7653e5873..e0bbd4ee5 100755 --- a/modules/nf-core/dupradar/templates/dupradar.r +++ b/modules/nf-core/dupradar/templates/dupradar.r @@ -166,21 +166,6 @@ print(sessionInfo()) sink() ################################################ -################################################ -## VERSIONS FILE ## -################################################ -################################################ - -r.version <- strsplit(version[['version.string']], ' ')[[1]][3] -dupradar.version <- as.character(packageVersion('dupRadar')) - -writeLines( - c( - '"${task.process}":', - paste(' bioconductor-dupradar:', dupradar.version) - ), -'versions.yml') - ################################################ ################################################ ################################################ diff --git a/modules/nf-core/kallisto/quant/main.nf b/modules/nf-core/kallisto/quant/main.nf index 8d04f2c6b..88e083cd2 100644 --- a/modules/nf-core/kallisto/quant/main.nf +++ b/modules/nf-core/kallisto/quant/main.nf @@ -1,3 +1,12 @@ +nextflow.preview.types = true + +record KallistoQuantResult { + meta: Map + results: Path + json_info: Path + log: Path +} + process KALLISTO_QUANT { tag "$meta.id" label 'process_high' @@ -8,18 +17,21 @@ process KALLISTO_QUANT { 'biocontainers/kallisto:0.51.1--heb0cbe2_0' }" input: - tuple val(meta), path(reads) - tuple val(meta2), path(index) - path gtf - path chromosomes - val fragment_length - val fragment_length_sd + (meta: Map, reads: Path): Record + (meta2: Map, index: Path): Record + gtf: Path? + chromosomes: Path? + fragment_length: String? + fragment_length_sd: String? output: - tuple val(meta), path("${prefix}") , emit: results - tuple val(meta), path("*.run_info.json") , emit: json_info - tuple val(meta), path("*.log") , emit: log - path "versions.yml" , emit: versions + record( + meta: meta, + results: file("${prefix}"), + json_info: file("*.run_info.json"), + log: file("*.log") + ) + tuple val("${task.process}"), val('kallisto'), eval('echo $(kallisto version) | sed "s/kallisto, version //g"'), topic: versions when: task.ext.when == null || task.ext.when @@ -61,11 +73,6 @@ process KALLISTO_QUANT { cp ${prefix}/kallisto_quant.log ${prefix}.log cp ${prefix}/run_info.json ${prefix}.run_info.json - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - kallisto: \$(echo \$(kallisto version) | sed "s/kallisto, version //g" ) - END_VERSIONS """ stub: @@ -75,10 +82,5 @@ process KALLISTO_QUANT { mkdir -p $prefix touch ${prefix}.log touch ${prefix}.run_info.json - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - kallisto: \$(echo \$(kallisto version) | sed "s/kallisto, version //g" ) - END_VERSIONS """ } diff --git a/modules/nf-core/picard/markduplicates/main.nf b/modules/nf-core/picard/markduplicates/main.nf index ad0b29636..4c9a57b2e 100644 --- a/modules/nf-core/picard/markduplicates/main.nf +++ b/modules/nf-core/picard/markduplicates/main.nf @@ -1,3 +1,13 @@ +nextflow.preview.types = true + +record PicardMarkDupResult { + meta: Map + bam: Path? + bai: Path? + cram: Path? + metrics: Path +} + process PICARD_MARKDUPLICATES { tag "$meta.id" label 'process_medium' @@ -8,16 +18,19 @@ process PICARD_MARKDUPLICATES { 'biocontainers/picard:3.1.1--hdfd78af_0' }" input: - tuple val(meta), path(reads) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) + (meta: Map, reads: Path): Record + (meta2: Map, fasta: Path): Record + (meta3: Map, fai: Path): Record output: - tuple val(meta), path("*.bam") , emit: bam, optional: true - tuple val(meta), path("*.bai") , emit: bai, optional: true - tuple val(meta), path("*.cram"), emit: cram, optional: true - tuple val(meta), path("*.metrics.txt"), emit: metrics - path "versions.yml" , emit: versions + record( + meta: meta, + bam: file("*.bam", optional: true), + bai: file("*.bai", optional: true), + cram: file("*.cram", optional: true), + metrics: file("*.metrics.txt") + ) + tuple val("${task.process}"), val('picard'), eval('echo $(picard MarkDuplicates --version 2>&1) | grep -o "Version:.*" | cut -f2- -d:'), topic: versions when: task.ext.when == null || task.ext.when @@ -45,11 +58,6 @@ process PICARD_MARKDUPLICATES { --OUTPUT ${prefix}.${suffix} \\ $reference \\ --METRICS_FILE ${prefix}.MarkDuplicates.metrics.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) - END_VERSIONS """ stub: @@ -59,10 +67,5 @@ process PICARD_MARKDUPLICATES { """ touch ${prefix}.${suffix} touch ${prefix}.MarkDuplicates.metrics.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) - END_VERSIONS """ } diff --git a/modules/nf-core/preseq/lcextrap/main.nf b/modules/nf-core/preseq/lcextrap/main.nf index 540a5fb27..f91148da2 100644 --- a/modules/nf-core/preseq/lcextrap/main.nf +++ b/modules/nf-core/preseq/lcextrap/main.nf @@ -1,3 +1,11 @@ +nextflow.preview.types = true + +record PreseqResult { + meta: Map + lc_extrap: Path + log: Path +} + process PRESEQ_LCEXTRAP { tag "$meta.id" label 'process_single' @@ -9,12 +17,15 @@ process PRESEQ_LCEXTRAP { 'biocontainers/preseq:3.2.0--hdcf5f25_6' }" input: - tuple val(meta), path(bam) + (meta: Map, bam: Path): Record output: - tuple val(meta), path("*.lc_extrap.txt"), emit: lc_extrap - tuple val(meta), path("*.log") , emit: log - path "versions.yml" , emit: versions + record( + meta: meta, + lc_extrap: file("*.lc_extrap.txt"), + log: file("*.log") + ) + tuple val("${task.process}"), val('preseq'), eval('echo $(preseq 2>&1) | sed "s/^.*Version: //; s/Usage.*$//"'), topic: versions when: task.ext.when == null || task.ext.when @@ -32,11 +43,6 @@ process PRESEQ_LCEXTRAP { -output ${prefix}.lc_extrap.txt \\ $bam cp .command.err ${prefix}.command.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - preseq: \$(echo \$(preseq 2>&1) | sed 's/^.*Version: //; s/Usage:.*\$//') - END_VERSIONS """ stub: @@ -44,10 +50,5 @@ process PRESEQ_LCEXTRAP { """ touch ${prefix}.lc_extrap.txt touch ${prefix}.command.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - preseq: \$(echo \$(preseq 2>&1) | sed 's/^.*Version: //; s/Usage:.*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/rsem/calculateexpression/main.nf b/modules/nf-core/rsem/calculateexpression/main.nf index 1962e4488..2aaed5987 100644 --- a/modules/nf-core/rsem/calculateexpression/main.nf +++ b/modules/nf-core/rsem/calculateexpression/main.nf @@ -1,3 +1,16 @@ +nextflow.preview.types = true + +record RsemCalcResult { + meta: Map + counts_gene: Path + counts_transcript: Path + stat: Path + logs: Path? + bam_star: Path? + bam_genome: Path? + bam_transcript: Path? +} + process RSEM_CALCULATEEXPRESSION { tag "$meta.id" label 'process_high' @@ -8,19 +21,22 @@ process RSEM_CALCULATEEXPRESSION { 'community.wave.seqera.io/library/rsem_star:5acb4e8c03239c32' }" input: - tuple val(meta), path(reads) // FASTQ files or BAM file for --alignments mode - path index + (meta: Map, reads: Path): Record // FASTQ files or BAM file for --alignments mode + index: Path output: - tuple val(meta), path("*.genes.results") , emit: counts_gene - tuple val(meta), path("*.isoforms.results"), emit: counts_transcript - tuple val(meta), path("*.stat") , emit: stat - tuple val(meta), path("*.log") , emit: logs, optional:true - path "versions.yml" , emit: versions - - tuple val(meta), path("*.STAR.genome.bam") , optional:true, emit: bam_star - tuple val(meta), path("${prefix}.genome.bam") , optional:true, emit: bam_genome - tuple val(meta), path("${prefix}.transcript.bam"), optional:true, emit: bam_transcript + record( + meta: meta, + counts_gene: file("*.genes.results"), + counts_transcript: file("*.isoforms.results"), + stat: file("*.stat"), + logs: file("*.log", optional: true), + bam_star: file("*.STAR.genome.bam", optional: true), + bam_genome: file("${prefix}.genome.bam", optional: true), + bam_transcript: file("${prefix}.transcript.bam", optional: true) + ) + tuple val("${task.process}"), val('rsem'), eval('rsem-calculate-expression --version | sed -e "s/Current version: RSEM v//g"'), topic: versions + tuple val("${task.process}"), val('star'), eval('STAR --version | sed -e "s/STAR_//g"'), topic: versions when: task.ext.when == null || task.ext.when @@ -67,12 +83,6 @@ process RSEM_CALCULATEEXPRESSION { $reads \\ \$INDEX \\ $prefix - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - rsem: \$(rsem-calculate-expression --version | sed -e "s/Current version: RSEM v//g") - star: \$(STAR --version | sed -e "s/STAR_//g") - END_VERSIONS """ stub: @@ -91,11 +101,5 @@ process RSEM_CALCULATEEXPRESSION { touch ${prefix}.genome.bam touch ${prefix}.transcript.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - rsem: \$(rsem-calculate-expression --version | sed -e "s/Current version: RSEM v//g") - star: \$(STAR --version | sed -e "s/STAR_//g") - END_VERSIONS """ } diff --git a/modules/nf-core/rseqc/bamstat/main.nf b/modules/nf-core/rseqc/bamstat/main.nf index 55ce0c9b5..e97d8ec1c 100644 --- a/modules/nf-core/rseqc/bamstat/main.nf +++ b/modules/nf-core/rseqc/bamstat/main.nf @@ -1,3 +1,10 @@ +nextflow.preview.types = true + +record BamStatResult { + meta: Map + txt: Path +} + process RSEQC_BAMSTAT { tag "$meta.id" label 'process_medium' @@ -8,11 +15,11 @@ process RSEQC_BAMSTAT { 'community.wave.seqera.io/library/rseqc_r-base:2e29d2dfda9cef15' }" input: - tuple val(meta), path(bam), path(bai) + (meta: Map, bam: Path, bai: Path): Record output: - tuple val(meta), path("*.bam_stat.txt"), emit: txt - tuple val("${task.process}"), val('rseqc'), eval('bam_stat.py --version | sed "s/bam_stat.py //"'), emit: versions_rseqc, topic: versions + record(meta: meta, txt: file("*.bam_stat.txt")) + tuple val("${task.process}"), val('rseqc'), eval('bam_stat.py --version | sed "s/bam_stat.py //"'), topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/rseqc/inferexperiment/main.nf b/modules/nf-core/rseqc/inferexperiment/main.nf index eeae81d0a..a13908abc 100644 --- a/modules/nf-core/rseqc/inferexperiment/main.nf +++ b/modules/nf-core/rseqc/inferexperiment/main.nf @@ -1,3 +1,10 @@ +nextflow.preview.types = true + +record InferExperimentResult { + meta: Map + txt: Path +} + process RSEQC_INFEREXPERIMENT { tag "$meta.id" label 'process_medium' @@ -8,12 +15,12 @@ process RSEQC_INFEREXPERIMENT { 'community.wave.seqera.io/library/rseqc_r-base:2e29d2dfda9cef15' }" input: - tuple val(meta), path(bam), path(bai) - path bed + (meta: Map, bam: Path, bai: Path): Record + bed: Path output: - tuple val(meta), path("*.infer_experiment.txt"), emit: txt - tuple val("${task.process}"), val('rseqc'), eval('infer_experiment.py --version | sed "s/infer_experiment.py //"'), emit: versions_rseqc, topic: versions + record(meta: meta, txt: file("*.infer_experiment.txt")) + tuple val("${task.process}"), val('rseqc'), eval('infer_experiment.py --version | sed "s/infer_experiment.py //"'), topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/rseqc/innerdistance/main.nf b/modules/nf-core/rseqc/innerdistance/main.nf index 7eddb2905..048bc818a 100644 --- a/modules/nf-core/rseqc/innerdistance/main.nf +++ b/modules/nf-core/rseqc/innerdistance/main.nf @@ -1,3 +1,14 @@ +nextflow.preview.types = true + +record InnerDistanceResult { + meta: Map + distance: Path? + freq: Path? + mean: Path? + pdf: Path? + rscript: Path? +} + process RSEQC_INNERDISTANCE { tag "$meta.id" label 'process_medium' @@ -8,16 +19,19 @@ process RSEQC_INNERDISTANCE { 'community.wave.seqera.io/library/rseqc_r-base:2e29d2dfda9cef15' }" input: - tuple val(meta), path(bam), path(bai) - path bed + (meta: Map, bam: Path, bai: Path): Record + bed: Path output: - tuple val(meta), path("*distance.txt"), optional:true, emit: distance - tuple val(meta), path("*freq.txt") , optional:true, emit: freq - tuple val(meta), path("*mean.txt") , optional:true, emit: mean - tuple val(meta), path("*.pdf") , optional:true, emit: pdf - tuple val(meta), path("*.r") , optional:true, emit: rscript - tuple val("${task.process}"), val('rseqc'), eval('inner_distance.py --version | sed "s/inner_distance.py //"'), emit: versions_rseqc, topic: versions + record( + meta: meta, + distance: file("*distance.txt", optional: true), + freq: file("*freq.txt", optional: true), + mean: file("*mean.txt", optional: true), + pdf: file("*.pdf", optional: true), + rscript: file("*.r", optional: true) + ) + tuple val("${task.process}"), val('rseqc'), eval('inner_distance.py --version | sed "s/inner_distance.py //"'), topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/rseqc/junctionannotation/main.nf b/modules/nf-core/rseqc/junctionannotation/main.nf index b1748c2df..c2fabbd77 100644 --- a/modules/nf-core/rseqc/junctionannotation/main.nf +++ b/modules/nf-core/rseqc/junctionannotation/main.nf @@ -1,3 +1,16 @@ +nextflow.preview.types = true + +record JunctionAnnotationResult { + meta: Map + bed: Path? + interact_bed: Path? + xls: Path + log: Path + pdf: Path? + events_pdf: Path? + rscript: Path +} + process RSEQC_JUNCTIONANNOTATION { tag "$meta.id" label 'process_medium' @@ -8,18 +21,21 @@ process RSEQC_JUNCTIONANNOTATION { 'community.wave.seqera.io/library/rseqc_r-base:2e29d2dfda9cef15' }" input: - tuple val(meta), path(bam), path(bai) - path bed + (meta: Map, bam: Path, bai: Path): Record + bed: Path output: - tuple val(meta), path("*.xls") , emit: xls - tuple val(meta), path("*.r") , emit: rscript - tuple val(meta), path("*.log") , emit: log - tuple val(meta), path("*.junction.bed"), optional:true, emit: bed - tuple val(meta), path("*.Interact.bed"), optional:true, emit: interact_bed - tuple val(meta), path("*junction.pdf") , optional:true, emit: pdf - tuple val(meta), path("*events.pdf") , optional:true, emit: events_pdf - tuple val("${task.process}"), val('rseqc'), eval('junction_annotation.py --version | sed "s/junction_annotation.py //"'), emit: versions_rseqc, topic: versions + record( + meta: meta, + bed: file("*.junction.bed", optional: true), + interact_bed: file("*.Interact.bed", optional: true), + xls: file("*.xls"), + log: file("*.log"), + pdf: file("*junction.pdf", optional: true), + events_pdf: file("*events.pdf", optional: true), + rscript: file("*.r") + ) + tuple val("${task.process}"), val('rseqc'), eval('junction_annotation.py --version | sed "s/junction_annotation.py //"'), topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/rseqc/junctionsaturation/main.nf b/modules/nf-core/rseqc/junctionsaturation/main.nf index 08b85ff1c..6fec703dc 100644 --- a/modules/nf-core/rseqc/junctionsaturation/main.nf +++ b/modules/nf-core/rseqc/junctionsaturation/main.nf @@ -1,3 +1,11 @@ +nextflow.preview.types = true + +record JunctionSaturationResult { + meta: Map + pdf: Path + rscript: Path +} + process RSEQC_JUNCTIONSATURATION { tag "$meta.id" label 'process_medium' @@ -8,13 +16,16 @@ process RSEQC_JUNCTIONSATURATION { 'community.wave.seqera.io/library/rseqc_r-base:2e29d2dfda9cef15' }" input: - tuple val(meta), path(bam), path(bai) - path bed + (meta: Map, bam: Path, bai: Path): Record + bed: Path output: - tuple val(meta), path("*.pdf"), emit: pdf - tuple val(meta), path("*.r") , emit: rscript - tuple val("${task.process}"), val('rseqc'), eval('junction_saturation.py --version | sed "s/junction_saturation.py //"'), emit: versions_rseqc, topic: versions + record( + meta: meta, + pdf: file("*.pdf"), + rscript: file("*.r") + ) + tuple val("${task.process}"), val('rseqc'), eval('junction_saturation.py --version | sed "s/junction_saturation.py //"'), topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/rseqc/readdistribution/main.nf b/modules/nf-core/rseqc/readdistribution/main.nf index e055485b1..a01237530 100644 --- a/modules/nf-core/rseqc/readdistribution/main.nf +++ b/modules/nf-core/rseqc/readdistribution/main.nf @@ -1,3 +1,10 @@ +nextflow.preview.types = true + +record ReadDistributionResult { + meta: Map + txt: Path +} + process RSEQC_READDISTRIBUTION { tag "$meta.id" label 'process_medium' @@ -8,12 +15,12 @@ process RSEQC_READDISTRIBUTION { 'community.wave.seqera.io/library/rseqc_r-base:2e29d2dfda9cef15' }" input: - tuple val(meta), path(bam), path(bai) - path bed + (meta: Map, bam: Path, bai: Path): Record + bed: Path output: - tuple val(meta), path("*.read_distribution.txt"), emit: txt - tuple val("${task.process}"), val('rseqc'), eval('read_distribution.py --version | sed "s/read_distribution.py //"'), emit: versions_rseqc, topic: versions + record(meta: meta, txt: file("*.read_distribution.txt")) + tuple val("${task.process}"), val('rseqc'), eval('read_distribution.py --version | sed "s/read_distribution.py //"'), topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/rseqc/readduplication/main.nf b/modules/nf-core/rseqc/readduplication/main.nf index 442ebc90c..3a7054aef 100644 --- a/modules/nf-core/rseqc/readduplication/main.nf +++ b/modules/nf-core/rseqc/readduplication/main.nf @@ -1,3 +1,13 @@ +nextflow.preview.types = true + +record ReadDuplicationResult { + meta: Map + seq_xls: Path + pos_xls: Path + pdf: Path + rscript: Path +} + process RSEQC_READDUPLICATION { tag "$meta.id" label 'process_medium' @@ -8,14 +18,17 @@ process RSEQC_READDUPLICATION { 'community.wave.seqera.io/library/rseqc_r-base:2e29d2dfda9cef15' }" input: - tuple val(meta), path(bam), path(bai) + (meta: Map, bam: Path, bai: Path): Record output: - tuple val(meta), path("*seq.DupRate.xls"), emit: seq_xls - tuple val(meta), path("*pos.DupRate.xls"), emit: pos_xls - tuple val(meta), path("*.pdf") , emit: pdf - tuple val(meta), path("*.r") , emit: rscript - tuple val("${task.process}"), val('rseqc'), eval('read_duplication.py --version | sed "s/read_duplication.py //"'), emit: versions_rseqc, topic: versions + record( + meta: meta, + seq_xls: file("*seq.DupRate.xls"), + pos_xls: file("*pos.DupRate.xls"), + pdf: file("*.pdf"), + rscript: file("*.r") + ) + tuple val("${task.process}"), val('rseqc'), eval('read_duplication.py --version | sed "s/read_duplication.py //"'), topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/rseqc/tin/main.nf b/modules/nf-core/rseqc/tin/main.nf index e7cfda0bb..ec75295b1 100644 --- a/modules/nf-core/rseqc/tin/main.nf +++ b/modules/nf-core/rseqc/tin/main.nf @@ -1,3 +1,11 @@ +nextflow.preview.types = true + +record TinResult { + meta: Map + txt: Path + xls: Path +} + process RSEQC_TIN { tag "$meta.id" label 'process_high' @@ -8,13 +16,12 @@ process RSEQC_TIN { 'community.wave.seqera.io/library/rseqc_r-base:2e29d2dfda9cef15' }" input: - tuple val(meta), path(bam), path(bai) - path bed + (meta: Map, bam: Path, bai: Path): Record + bed: Path output: - tuple val(meta), path("*.txt"), emit: txt - tuple val(meta), path("*.xls"), emit: xls - tuple val("${task.process}"), val('rseqc'), eval('tin.py --version | sed "s/tin.py //"'), emit: versions_rseqc, topic: versions + record(meta: meta, txt: file("*.txt"), xls: file("*.xls")) + tuple val("${task.process}"), val('rseqc'), eval('tin.py --version | sed "s/tin.py //"'), topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index a77ad8214..5a6cbc716 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -1,3 +1,12 @@ +nextflow.preview.types = true + +record SamtoolsIndexResult { + meta: Map + bai: Path? + csi: Path? + crai: Path? +} + process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' @@ -8,13 +17,16 @@ process SAMTOOLS_INDEX { 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: - tuple val(meta), path(input) + (meta: Map, input: Path): Record output: - tuple val(meta), path("*.bai") , optional:true, emit: bai - tuple val(meta), path("*.csi") , optional:true, emit: csi - tuple val(meta), path("*.crai"), optional:true, emit: crai - path "versions.yml" , emit: versions + record( + meta: meta, + bai: file("*.bai", optional: true), + csi: file("*.csi", optional: true), + crai: file("*.crai", optional: true) + ) + tuple val("${task.process}"), val('samtools'), eval('echo $(samtools --version 2>&1) | sed "s/^.*samtools //; s/Using.*$//"'), topic: versions when: task.ext.when == null || task.ext.when @@ -27,11 +39,6 @@ process SAMTOOLS_INDEX { -@ ${task.cpus} \\ $args \\ $input - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ stub: @@ -40,10 +47,5 @@ process SAMTOOLS_INDEX { "crai" : args.contains("-c") ? "csi" : "bai" """ touch ${input}.${extension} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf index 6b5aa31dd..79ee5085f 100644 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -1,3 +1,15 @@ +nextflow.preview.types = true + +record SamtoolsSortResult { + meta: Map + bam: Path? + cram: Path? + sam: Path? + crai: Path? + csi: Path? + bai: Path? +} + process SAMTOOLS_SORT { tag "$meta.id" label 'process_medium' @@ -8,18 +20,21 @@ process SAMTOOLS_SORT { 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: - tuple val(meta) , path(bam) - tuple val(meta2), path(fasta) - val index_format + (meta: Map, bam: Path): Record + (meta2: Map, fasta: Path): Record + index_format: String? output: - tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true - tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true - tuple val(meta), path("${prefix}.sam"), emit: sam, optional: true - tuple val(meta), path("${prefix}.${extension}.crai"), emit: crai, optional: true - tuple val(meta), path("${prefix}.${extension}.csi"), emit: csi, optional: true - tuple val(meta), path("${prefix}.${extension}.bai"), emit: bai, optional: true - tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools + record( + meta: meta, + bam: file("${prefix}.bam", optional: true), + cram: file("${prefix}.cram", optional: true), + sam: file("${prefix}.sam", optional: true), + crai: file("${prefix}.${extension}.crai", optional: true), + csi: file("${prefix}.${extension}.csi", optional: true), + bai: file("${prefix}.${extension}.bai", optional: true) + ) + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/sentieon/rsemcalculateexpression/main.nf b/modules/nf-core/sentieon/rsemcalculateexpression/main.nf index b2efb5dc2..e09b98924 100644 --- a/modules/nf-core/sentieon/rsemcalculateexpression/main.nf +++ b/modules/nf-core/sentieon/rsemcalculateexpression/main.nf @@ -1,3 +1,16 @@ +nextflow.preview.types = true + +record RsemCalcResult { + meta: Map + counts_gene: Path + counts_transcript: Path + stat: Path + logs: Path? + bam_star: Path? + bam_genome: Path? + bam_transcript: Path? +} + process SENTIEON_RSEMCALCULATEEXPRESSION { tag "$meta.id" label 'process_high' @@ -9,22 +22,23 @@ process SENTIEON_RSEMCALCULATEEXPRESSION { 'community.wave.seqera.io/library/rsem_sentieon:1d3ad86b89bf5cc7' }" input: - tuple val(meta), path(reads) // FASTQ files or BAM file for --alignments mode - path index + (meta: Map, reads: Path): Record // FASTQ files or BAM file for --alignments mode + index: Path output: - tuple val(meta), path("*.genes.results") , emit: counts_gene - tuple val(meta), path("*.isoforms.results"), emit: counts_transcript - tuple val(meta), path("*.stat") , emit: stat - tuple val(meta), path("*.log") , emit: logs, optional:true - - tuple val(meta), path("*.STAR.genome.bam") , optional:true, emit: bam_star - tuple val(meta), path("${prefix}.genome.bam") , optional:true, emit: bam_genome - tuple val(meta), path("${prefix}.transcript.bam"), optional:true, emit: bam_transcript - - tuple val("${task.process}"), val('rsem'), eval('rsem-calculate-expression --version | sed -e "s/Current version: RSEM v//g"'), topic: versions, emit: versions_rsem - tuple val("${task.process}"), val('star'), eval('STAR --version | sed -e "s/STAR_//g"'), topic: versions, emit: versions_star - tuple val("${task.process}"), val('sentieon'), eval('sentieon driver --version 2>&1 | sed -e "s/sentieon-genomics-//g"'), topic: versions, emit: versions_sentieon + record( + meta: meta, + counts_gene: file("*.genes.results"), + counts_transcript: file("*.isoforms.results"), + stat: file("*.stat"), + logs: file("*.log", optional: true), + bam_star: file("*.STAR.genome.bam", optional: true), + bam_genome: file("${prefix}.genome.bam", optional: true), + bam_transcript: file("${prefix}.transcript.bam", optional: true) + ) + tuple val("${task.process}"), val('rsem'), eval('rsem-calculate-expression --version | sed -e "s/Current version: RSEM v//g"'), topic: versions + tuple val("${task.process}"), val('star'), eval('STAR --version | sed -e "s/STAR_//g"'), topic: versions + tuple val("${task.process}"), val('sentieon'), eval('sentieon driver --version 2>&1 | sed -e "s/sentieon-genomics-//g"'), topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/sentieon/staralign/main.nf b/modules/nf-core/sentieon/staralign/main.nf index 3eab1b62a..474ca675d 100644 --- a/modules/nf-core/sentieon/staralign/main.nf +++ b/modules/nf-core/sentieon/staralign/main.nf @@ -1,3 +1,26 @@ +nextflow.preview.types = true + +// Uses the same record type as STAR_ALIGN +record StarAlignResult { + meta: Map + bam: Path? + bam_sorted: Path? + bam_sorted_aligned: Path? + bam_transcript: Path? + bam_unsorted: Path? + log_final: Path + log_out: Path + log_progress: Path + fastq: Path? + tab: Path? + spl_junc_tab: Path? + read_per_gene_tab: Path? + junction: Path? + sam: Path? + wig: Path? + bedgraph: Path? +} + process SENTIEON_STARALIGN { tag "${meta.id}" label 'process_high' @@ -10,32 +33,38 @@ process SENTIEON_STARALIGN { : 'community.wave.seqera.io/library/sentieon:202503.01--1863def31ed8e4d5'}" input: - tuple val(meta), path(reads, stageAs: "input*/*") - tuple val(meta2), path(index) - tuple val(meta3), path(gtf) - val star_ignore_sjdbgtf - val seq_platform - val seq_center + (meta: Map, reads: Path): Record + (meta2: Map, index: Path): Record + + stage: + stageAs(reads, 'input*/*') + (meta3: Map, gtf: Path): Record + star_ignore_sjdbgtf: String? + seq_platform: String? + seq_center: String? output: - tuple val(meta), path('*Log.final.out'), emit: log_final - tuple val(meta), path('*Log.out'), emit: log_out - tuple val(meta), path('*Log.progress.out'), emit: log_progress - tuple val(meta), path('*d.out.bam'), emit: bam, optional: true - tuple val(meta), path("${prefix}.sortedByCoord.out.bam"), emit: bam_sorted, optional: true - tuple val(meta), path("${prefix}.Aligned.sortedByCoord.out.bam"), emit: bam_sorted_aligned, optional: true - tuple val(meta), path('*toTranscriptome.out.bam'), emit: bam_transcript, optional: true - tuple val(meta), path('*Aligned.unsort.out.bam'), emit: bam_unsorted, optional: true - tuple val(meta), path('*fastq.gz'), emit: fastq, optional: true - tuple val(meta), path('*.tab'), emit: tab, optional: true - tuple val(meta), path('*.SJ.out.tab'), emit: spl_junc_tab, optional: true - tuple val(meta), path('*.ReadsPerGene.out.tab'), emit: read_per_gene_tab, optional: true - tuple val(meta), path('*.out.junction'), emit: junction, optional: true - tuple val(meta), path('*.out.sam'), emit: sam, optional: true - tuple val(meta), path('*.wig'), emit: wig, optional: true - tuple val(meta), path('*.bg'), emit: bedgraph, optional: true - tuple val("${task.process}"), val('star'), eval('sentieon STAR --version | sed -e "s/STAR_//g"'), topic: versions, emit: versions_star - tuple val("${task.process}"), val('sentieon'), eval('sentieon driver --version 2>&1 | sed -e "s/sentieon-genomics-//g"'), topic: versions, emit: versions_sentieon + record( + meta: meta, + bam: file('*d.out.bam', optional: true), + bam_sorted: file("${prefix}.sortedByCoord.out.bam", optional: true), + bam_sorted_aligned: file("${prefix}.Aligned.sortedByCoord.out.bam", optional: true), + bam_transcript: file('*toTranscriptome.out.bam', optional: true), + bam_unsorted: file('*Aligned.unsort.out.bam', optional: true), + log_final: file('*Log.final.out'), + log_out: file('*Log.out'), + log_progress: file('*Log.progress.out'), + fastq: file('*fastq.gz', optional: true), + tab: file('*.tab', optional: true), + spl_junc_tab: file('*.SJ.out.tab', optional: true), + read_per_gene_tab: file('*.ReadsPerGene.out.tab', optional: true), + junction: file('*.out.junction', optional: true), + sam: file('*.out.sam', optional: true), + wig: file('*.wig', optional: true), + bedgraph: file('*.bg', optional: true) + ) + tuple val("${task.process}"), val('star'), eval('sentieon STAR --version | sed -e "s/STAR_//g"'), topic: versions + tuple val("${task.process}"), val('sentieon'), eval('sentieon driver --version 2>&1 | sed -e "s/sentieon-genomics-//g"'), topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf index bf04e66f0..91e67279e 100644 --- a/modules/nf-core/star/align/main.nf +++ b/modules/nf-core/star/align/main.nf @@ -1,3 +1,25 @@ +nextflow.preview.types = true + +record StarAlignResult { + meta: Map + bam: Path? + bam_sorted: Path? + bam_sorted_aligned: Path? + bam_transcript: Path? + bam_unsorted: Path? + log_final: Path + log_out: Path + log_progress: Path + fastq: Path? + tab: Path? + spl_junc_tab: Path? + read_per_gene_tab: Path? + junction: Path? + sam: Path? + wig: Path? + bedgraph: Path? +} + process STAR_ALIGN { tag "$meta.id" label 'process_high' @@ -8,32 +30,39 @@ process STAR_ALIGN { 'community.wave.seqera.io/library/htslib_samtools_star_gawk:ae438e9a604351a4' }" input: - tuple val(meta), path(reads, stageAs: "input*/*") - tuple val(meta2), path(index) - tuple val(meta3), path(gtf) - val star_ignore_sjdbgtf - val seq_platform - val seq_center + (meta: Map, reads: Path): Record + (meta2: Map, index: Path): Record - output: - tuple val(meta), path('*Log.final.out') , emit: log_final - tuple val(meta), path('*Log.out') , emit: log_out - tuple val(meta), path('*Log.progress.out'), emit: log_progress - path "versions.yml" , emit: versions + stage: + stageAs(reads, 'input*/*') + (meta3: Map, gtf: Path): Record + star_ignore_sjdbgtf: String? + seq_platform: String? + seq_center: String? - tuple val(meta), path('*d.out.bam') , optional:true, emit: bam - tuple val(meta), path("${prefix}.sortedByCoord.out.bam") , optional:true, emit: bam_sorted - tuple val(meta), path("${prefix}.Aligned.sortedByCoord.out.bam") , optional:true, emit: bam_sorted_aligned - tuple val(meta), path('*toTranscriptome.out.bam') , optional:true, emit: bam_transcript - tuple val(meta), path('*Aligned.unsort.out.bam') , optional:true, emit: bam_unsorted - tuple val(meta), path('*fastq.gz') , optional:true, emit: fastq - tuple val(meta), path('*.tab') , optional:true, emit: tab - tuple val(meta), path('*.SJ.out.tab') , optional:true, emit: spl_junc_tab - tuple val(meta), path('*.ReadsPerGene.out.tab') , optional:true, emit: read_per_gene_tab - tuple val(meta), path('*.out.junction') , optional:true, emit: junction - tuple val(meta), path('*.out.sam') , optional:true, emit: sam - tuple val(meta), path('*.wig') , optional:true, emit: wig - tuple val(meta), path('*.bg') , optional:true, emit: bedgraph + output: + record( + meta: meta, + bam: file('*d.out.bam', optional: true), + bam_sorted: file("${prefix}.sortedByCoord.out.bam", optional: true), + bam_sorted_aligned: file("${prefix}.Aligned.sortedByCoord.out.bam", optional: true), + bam_transcript: file('*toTranscriptome.out.bam', optional: true), + bam_unsorted: file('*Aligned.unsort.out.bam', optional: true), + log_final: file('*Log.final.out'), + log_out: file('*Log.out'), + log_progress: file('*Log.progress.out'), + fastq: file('*fastq.gz', optional: true), + tab: file('*.tab', optional: true), + spl_junc_tab: file('*.SJ.out.tab', optional: true), + read_per_gene_tab: file('*.ReadsPerGene.out.tab', optional: true), + junction: file('*.out.junction', optional: true), + sam: file('*.out.sam', optional: true), + wig: file('*.wig', optional: true), + bedgraph: file('*.bg', optional: true) + ) + tuple val("${task.process}"), val('star'), eval('STAR --version | sed -e "s/STAR_//g"'), topic: versions + tuple val("${task.process}"), val('samtools'), eval('echo $(samtools --version 2>&1) | sed "s/^.*samtools //; s/Using.*$//"'), topic: versions + tuple val("${task.process}"), val('gawk'), eval('echo $(gawk --version 2>&1) | sed "s/^.*GNU Awk //; s/, .*$//"'), topic: versions when: task.ext.when == null || task.ext.when @@ -72,12 +101,6 @@ process STAR_ALIGN { gzip ${prefix}.unmapped_2.fastq fi - cat <<-END_VERSIONS > versions.yml - "${task.process}": - star: \$(STAR --version | sed -e "s/STAR_//g") - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') - END_VERSIONS """ stub: @@ -100,12 +123,5 @@ process STAR_ALIGN { touch ${prefix}.out.sam touch ${prefix}.Signal.UniqueMultiple.str1.out.wig touch ${prefix}.Signal.UniqueMultiple.str1.out.bg - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - star: \$(STAR --version | sed -e "s/STAR_//g") - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/tximeta/tximport/main.nf b/modules/nf-core/tximeta/tximport/main.nf index b0cce8536..a176f46b7 100644 --- a/modules/nf-core/tximeta/tximport/main.nf +++ b/modules/nf-core/tximeta/tximport/main.nf @@ -1,3 +1,17 @@ +nextflow.preview.types = true + +record TximportResult { + meta: Map + tpm_gene: Path + counts_gene: Path + counts_gene_length_scaled: Path + counts_gene_scaled: Path + lengths_gene: Path + tpm_transcript: Path + counts_transcript: Path + lengths_transcript: Path +} + process TXIMETA_TXIMPORT { label "process_medium" @@ -7,20 +21,26 @@ process TXIMETA_TXIMPORT { 'biocontainers/bioconductor-tximeta:1.20.1--r43hdfd78af_0' }" input: - tuple val(meta), path("quants/*") - tuple val(meta2), path(tx2gene) - val quant_type + (meta: Map, quants: Path): Record + (meta2: Map, tx2gene: Path): Record + + stage: + stageAs(quants, 'quants/*') + quant_type: String output: - tuple val(meta), path("*gene_tpm.tsv") , emit: tpm_gene - tuple val(meta), path("*gene_counts.tsv") , emit: counts_gene - tuple val(meta), path("*gene_counts_length_scaled.tsv"), emit: counts_gene_length_scaled - tuple val(meta), path("*gene_counts_scaled.tsv") , emit: counts_gene_scaled - tuple val(meta), path("*gene_lengths.tsv") , emit: lengths_gene - tuple val(meta), path("*transcript_tpm.tsv") , emit: tpm_transcript - tuple val(meta), path("*transcript_counts.tsv") , emit: counts_transcript - tuple val(meta), path("*transcript_lengths.tsv") , emit: lengths_transcript - path "versions.yml" , emit: versions + record( + meta: meta, + tpm_gene: file("*gene_tpm.tsv"), + counts_gene: file("*gene_counts.tsv"), + counts_gene_length_scaled: file("*gene_counts_length_scaled.tsv"), + counts_gene_scaled: file("*gene_counts_scaled.tsv"), + lengths_gene: file("*gene_lengths.tsv"), + tpm_transcript: file("*transcript_tpm.tsv"), + counts_transcript: file("*transcript_counts.tsv"), + lengths_transcript: file("*transcript_lengths.tsv") + ) + tuple val("${task.process}"), val('bioconductor-tximeta'), eval('Rscript -e "library(tximeta); cat(as.character(packageVersion(\'tximeta\')))"'), topic: versions when: task.ext.when == null || task.ext.when @@ -38,10 +58,5 @@ process TXIMETA_TXIMPORT { touch ${meta.id}.transcript_tpm.tsv touch ${meta.id}.transcript_counts.tsv touch ${meta.id}.transcript_lengths.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bioconductor-tximeta: \$(Rscript -e "library(tximeta); cat(as.character(packageVersion('tximeta')))") - END_VERSIONS """ } diff --git a/modules/nf-core/tximeta/tximport/templates/tximport.r b/modules/nf-core/tximeta/tximport/templates/tximport.r index 0ff05ef04..55f753ba7 100755 --- a/modules/nf-core/tximeta/tximport/templates/tximport.r +++ b/modules/nf-core/tximeta/tximport/templates/tximport.r @@ -242,18 +242,6 @@ sink() ################################################ ## VERSIONS FILE ## ################################################ -################################################ - -r.version <- strsplit(version[['version.string']], ' ')[[1]][3] -tximeta.version <- as.character(packageVersion('tximeta')) - -writeLines( - c( - '"${task.process}":', - paste(' bioconductor-tximeta:', tximeta.version) - ), -'versions.yml') - ################################################ ################################################ ################################################ diff --git a/modules/nf-core/umitools/prepareforrsem/main.nf b/modules/nf-core/umitools/prepareforrsem/main.nf index 18772d8a4..d57f9b748 100644 --- a/modules/nf-core/umitools/prepareforrsem/main.nf +++ b/modules/nf-core/umitools/prepareforrsem/main.nf @@ -1,3 +1,11 @@ +nextflow.preview.types = true + +record PrepareForRsemResult { + meta: Map + bam: Path + log: Path +} + process UMITOOLS_PREPAREFORRSEM { tag "$meta.id" label 'process_medium' @@ -8,12 +16,15 @@ process UMITOOLS_PREPAREFORRSEM { 'biocontainers/umi_tools:1.1.6--py311haab0aaa_0' }" input: - tuple val(meta), path(bam), path(bai) + (meta: Map, bam: Path, bai: Path): Record output: - tuple val(meta), path('*.bam'), emit: bam - tuple val(meta), path('*.log'), emit: log - path "versions.yml" , emit: versions + record( + meta: meta, + bam: file('*.bam'), + log: file('*.log') + ) + tuple val("${task.process}"), val('umi_tools'), eval('umi_tools --version | sed "/version:/!d; s/.*: //"'), topic: versions when: task.ext.when == null || task.ext.when @@ -28,21 +39,11 @@ process UMITOOLS_PREPAREFORRSEM { --stdout=${prefix}.bam \\ --log=${prefix}.prepare_for_rsem.log \\ $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) - END_VERSIONS """ stub: """ touch ${meta.id}.bam touch ${meta.id}.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) - END_VERSIONS """ } diff --git a/subworkflows/local/align_hisat2/main.nf b/subworkflows/local/align_hisat2/main.nf index e60f78419..6d2bac366 100644 --- a/subworkflows/local/align_hisat2/main.nf +++ b/subworkflows/local/align_hisat2/main.nf @@ -18,15 +18,12 @@ workflow ALIGN_HISAT2 { skip_markduplicates // boolean: skip marking duplicates transcriptome_bam // channel: [ val(meta), path(bam) ] - for UMI dedup transcript_fasta // channel: [ val(meta), [ fasta ] ] - for UMI dedup - input_genome_bam // channel: [ val(meta), path(bam) ] - pre-existing genome BAMs to mix (when not using UMI) - input_genome_bam_index // channel: [ val(meta), path(bai) ] - pre-existing genome BAM indices + input_genome_bam // channel: [ val(meta), path(bam) ] + input_genome_bam_index // channel: [ val(meta), path(bai) ] main: ch_versions = channel.empty() - // - // SUBWORKFLOW: Align reads with HISAT2 - // FASTQ_ALIGN_HISAT2 ( reads, index, @@ -38,29 +35,9 @@ workflow ALIGN_HISAT2 { ch_genome_bam_index = bam_csi_index ? FASTQ_ALIGN_HISAT2.out.csi : FASTQ_ALIGN_HISAT2.out.bai ch_versions = ch_versions.mix(FASTQ_ALIGN_HISAT2.out.versions) - // Initialize UMI output channels as empty - will be populated if with_umi is true - ch_umi_genomic_dedup_log = channel.empty() - ch_umi_transcriptomic_dedup_log = channel.empty() - ch_umi_prepare_for_rsem_log = channel.empty() - ch_umi_transcriptome_dedup_bam = channel.empty() - ch_umi_transcriptome_sorted_bam = channel.empty() - ch_umi_transcriptome_sorted_bam_bai = channel.empty() - ch_umi_transcriptome_filtered_bam = channel.empty() - ch_umi_dedup_stats = channel.empty() - ch_umi_dedup_bam = channel.empty() - ch_umi_dedup_bai = channel.empty() - ch_umi_dedup_flagstat = channel.empty() - ch_umi_dedup_idxstats = channel.empty() - ch_umi_dedup_tsv_edit_distance = channel.empty() - ch_umi_dedup_tsv_per_umi = channel.empty() - ch_umi_dedup_tsv_umi_per_position = channel.empty() - - // Initialize multiqc_files with HISAT2 summary (always added) + ch_umi_result = channel.empty() ch_multiqc_files = FASTQ_ALIGN_HISAT2.out.summary.collect{ tuple -> tuple[1] } - // - // SUBWORKFLOW: Remove duplicate reads from BAM file based on UMIs - // if (with_umi) { def ch_bam_for_dedup = input_genome_bam.mix(ch_genome_bam) def ch_bai_for_dedup = input_genome_bam_index.mix(ch_genome_bam_index) @@ -75,23 +52,9 @@ workflow ALIGN_HISAT2 { transcript_fasta ) - ch_genome_bam = BAM_DEDUP_UMI.out.bam - ch_genome_bam_index = BAM_DEDUP_UMI.out.bai - ch_umi_genomic_dedup_log = BAM_DEDUP_UMI.out.genomic_dedup_log - ch_umi_transcriptomic_dedup_log = BAM_DEDUP_UMI.out.transcriptomic_dedup_log - ch_umi_prepare_for_rsem_log = BAM_DEDUP_UMI.out.prepare_for_rsem_log - ch_umi_transcriptome_dedup_bam = BAM_DEDUP_UMI.out.transcriptome_dedup_bam - ch_umi_transcriptome_sorted_bam = BAM_DEDUP_UMI.out.transcriptome_sorted_bam - ch_umi_transcriptome_sorted_bam_bai = BAM_DEDUP_UMI.out.transcriptome_sorted_bam_bai - ch_umi_transcriptome_filtered_bam = BAM_DEDUP_UMI.out.transcriptome_filtered_bam - ch_umi_dedup_stats = BAM_DEDUP_UMI.out.stats - ch_umi_dedup_bam = BAM_DEDUP_UMI.out.bam - ch_umi_dedup_bai = BAM_DEDUP_UMI.out.bai - ch_umi_dedup_flagstat = BAM_DEDUP_UMI.out.flagstat - ch_umi_dedup_idxstats = BAM_DEDUP_UMI.out.idxstats - ch_umi_dedup_tsv_edit_distance = BAM_DEDUP_UMI.out.tsv_edit_distance - ch_umi_dedup_tsv_per_umi = BAM_DEDUP_UMI.out.tsv_per_umi - ch_umi_dedup_tsv_umi_per_position = BAM_DEDUP_UMI.out.tsv_umi_per_position + ch_umi_result = BAM_DEDUP_UMI.out.result + ch_genome_bam = ch_umi_result.map { r -> [r.meta, r.bam] } + ch_genome_bam_index = ch_umi_result.map { r -> [r.meta, r.bai] } ch_versions = ch_versions.mix(BAM_DEDUP_UMI.out.versions) ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_UMI.out.multiqc_files) @@ -100,10 +63,6 @@ workflow ALIGN_HISAT2 { ch_genome_bam_index = input_genome_bam_index.mix(ch_genome_bam_index) if (skip_markduplicates) { - // The deduplicated stats should take priority for MultiQC, but use - // them straight out of the aligner otherwise. If mark duplicates - // will run, those stats will be added later instead to avoid - // duplicate flagstat files in MultiQC. ch_multiqc_files = ch_multiqc_files .mix(FASTQ_ALIGN_HISAT2.out.stats.collect{ tuple -> tuple[1] }) .mix(FASTQ_ALIGN_HISAT2.out.flagstat.collect{ tuple -> tuple[1] }) @@ -112,32 +71,15 @@ workflow ALIGN_HISAT2 { } emit: - bam = ch_genome_bam // channel: [ val(meta), path(bam) ] - bai = ch_genome_bam_index // channel: [ val(meta), path(bai) ] - orig_bam = FASTQ_ALIGN_HISAT2.out.bam // channel: [ val(meta), path(bam) ] - original aligned BAM before dedup - unaligned = FASTQ_ALIGN_HISAT2.out.fastq // channel: [ val(meta), path(fastq) ] - summary = FASTQ_ALIGN_HISAT2.out.summary // channel: [ val(meta), path(summary) ] - stats = FASTQ_ALIGN_HISAT2.out.stats // channel: [ val(meta), path(stats) ] - flagstat = FASTQ_ALIGN_HISAT2.out.flagstat // channel: [ val(meta), path(flagstat) ] - idxstats = FASTQ_ALIGN_HISAT2.out.idxstats // channel: [ val(meta), path(idxstats) ] - - // UMI dedup outputs - umi_genomic_dedup_log = ch_umi_genomic_dedup_log - umi_transcriptomic_dedup_log = ch_umi_transcriptomic_dedup_log - umi_prepare_for_rsem_log = ch_umi_prepare_for_rsem_log - umi_transcriptome_dedup_bam = ch_umi_transcriptome_dedup_bam - umi_transcriptome_sorted_bam = ch_umi_transcriptome_sorted_bam - umi_transcriptome_sorted_bam_bai = ch_umi_transcriptome_sorted_bam_bai - umi_transcriptome_filtered_bam = ch_umi_transcriptome_filtered_bam - umi_dedup_stats = ch_umi_dedup_stats - umi_dedup_bam = ch_umi_dedup_bam - umi_dedup_bai = ch_umi_dedup_bai - umi_dedup_flagstat = ch_umi_dedup_flagstat - umi_dedup_idxstats = ch_umi_dedup_idxstats - umi_dedup_tsv_edit_distance = ch_umi_dedup_tsv_edit_distance - umi_dedup_tsv_per_umi = ch_umi_dedup_tsv_per_umi - umi_dedup_tsv_umi_per_position = ch_umi_dedup_tsv_umi_per_position - - multiqc_files = ch_multiqc_files // channel: [ path(files) ] - versions = ch_versions // channel: [ versions.yml ] + bam = ch_genome_bam + bai = ch_genome_bam_index + orig_bam = FASTQ_ALIGN_HISAT2.out.bam + unaligned = FASTQ_ALIGN_HISAT2.out.fastq + summary = FASTQ_ALIGN_HISAT2.out.summary + stats = FASTQ_ALIGN_HISAT2.out.stats + flagstat = FASTQ_ALIGN_HISAT2.out.flagstat + idxstats = FASTQ_ALIGN_HISAT2.out.idxstats + umi = ch_umi_result // channel: UmiDedupResult records or empty + multiqc_files = ch_multiqc_files + versions = ch_versions } diff --git a/subworkflows/local/align_star/main.nf b/subworkflows/local/align_star/main.nf index a24b2059c..9b6055004 100644 --- a/subworkflows/local/align_star/main.nf +++ b/subworkflows/local/align_star/main.nf @@ -8,9 +8,6 @@ include { BAM_SORT_STATS_SAMTOOLS } from '../../nf-core/bam_so include { BAM_DEDUP_UMI } from '../../nf-core/bam_dedup_umi' -// -// Function that parses and returns the alignment rate from the STAR log output -// def getStarPercentMapped(_params, align_log) { def percent_aligned = 0 def pattern = /Uniquely mapped reads %\s*\|\s*([\d\.]+)%/ @@ -29,59 +26,47 @@ workflow ALIGN_STAR { reads // channel: [ val(meta), [ reads ] ] index // channel: [ val(meta), [ index ] ] gtf // channel: [ val(meta), [ gtf ] ] - star_ignore_sjdbgtf // boolean: when using pre-built STAR indices do not re-extract and use splice junctions from the GTF file - seq_platform // string : sequencing platform - seq_center // string : sequencing center - is_aws_igenome // boolean: whether the genome files are from AWS iGenomes + star_ignore_sjdbgtf // boolean + seq_platform // string + seq_center // string + is_aws_igenome // boolean fasta // channel: /path/to/fasta - use_sentieon_star // boolean: whether star alignment is accelerated with Sentieon - with_umi // boolean: whether UMI processing is enabled - umi_dedup_tool // string: 'umicollapse' or 'umitools' - umitools_dedup_stats // boolean: whether to generate UMI-tools dedup stats - bam_csi_index // boolean: whether to generate CSI index - skip_markduplicates // boolean: skip marking duplicates - transcript_fasta // channel: [ val(meta), [ fasta ] ] - for UMI dedup - input_genome_bam // channel: [ val(meta), path(bam) ] - pre-existing genome BAMs to mix (when not using UMI) - input_genome_bam_index // channel: [ val(meta), path(bai) ] - pre-existing genome BAM indices - input_transcriptome_bam // channel: [ val(meta), path(bam) ] - pre-existing transcriptome BAMs to mix + use_sentieon_star // boolean + with_umi // boolean + umi_dedup_tool // string + umitools_dedup_stats // boolean + bam_csi_index // boolean + skip_markduplicates // boolean + transcript_fasta // channel: [ val(meta), [ fasta ] ] + input_genome_bam // channel: [ val(meta), path(bam) ] + input_genome_bam_index // channel: [ val(meta), path(bai) ] + input_transcriptome_bam // channel: [ val(meta), path(bam) ] main: ch_versions = channel.empty() // - // Map reads with STAR + // Map reads with STAR (all variants emit StarAlignResult record) // ch_star_out = null if (use_sentieon_star) { - SENTIEON_STAR_ALIGN(reads, index, gtf, star_ignore_sjdbgtf, seq_platform, seq_center) ch_star_out = SENTIEON_STAR_ALIGN - // SENTIEON_STAR_ALIGN uses topic-based version reporting - } else if (is_aws_igenome) { - STAR_ALIGN_IGENOMES(reads, index, gtf, star_ignore_sjdbgtf, seq_platform, seq_center) ch_star_out = STAR_ALIGN_IGENOMES - ch_versions = ch_versions.mix(STAR_ALIGN_IGENOMES.out.versions.first()) - } else { - STAR_ALIGN(reads, index, gtf, star_ignore_sjdbgtf, seq_platform, seq_center) ch_star_out = STAR_ALIGN - ch_versions = ch_versions.mix(STAR_ALIGN.out.versions.first()) - } - ch_orig_bam = ch_star_out.out.bam - ch_log_final = ch_star_out.out.log_final - ch_log_out = ch_star_out.out.log_out - ch_log_progress = ch_star_out.out.log_progress - ch_bam_sorted = ch_star_out.out.bam_sorted - ch_bam_transcript = ch_star_out.out.bam_transcript - ch_fastq = ch_star_out.out.fastq - ch_tab = ch_star_out.out.tab - ch_percent_mapped = ch_log_final.map { meta, log -> [ meta, getStarPercentMapped(params, log) ] } + // Access record fields: .out is channel of StarAlignResult records + ch_star_result = ch_star_out.out + ch_orig_bam = ch_star_result.map { r -> [r.meta, r.bam] } + ch_percent_mapped = ch_star_result.map { r -> + [r.meta, getStarPercentMapped(params, r.log_final)] + } // // Sort, index BAM file and run samtools stats, flagstat and idxstats @@ -89,33 +74,17 @@ workflow ALIGN_STAR { BAM_SORT_STATS_SAMTOOLS(ch_orig_bam, fasta) ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) - ch_genome_bam = BAM_SORT_STATS_SAMTOOLS.out.bam - ch_genome_bam_index = bam_csi_index ? BAM_SORT_STATS_SAMTOOLS.out.csi : BAM_SORT_STATS_SAMTOOLS.out.bai - ch_transcriptome_bam = ch_bam_transcript - - // Initialize UMI output channels as empty - will be populated if with_umi is true - ch_umi_genomic_dedup_log = channel.empty() - ch_umi_transcriptomic_dedup_log = channel.empty() - ch_umi_prepare_for_rsem_log = channel.empty() - ch_umi_transcriptome_dedup_bam = channel.empty() - ch_umi_transcriptome_sorted_bam = channel.empty() - ch_umi_transcriptome_sorted_bam_bai = channel.empty() - ch_umi_transcriptome_filtered_bam = channel.empty() - ch_umi_dedup_stats = channel.empty() - ch_umi_dedup_bam = channel.empty() - ch_umi_dedup_bai = channel.empty() - ch_umi_dedup_flagstat = channel.empty() - ch_umi_dedup_idxstats = channel.empty() - ch_umi_dedup_tsv_edit_distance = channel.empty() - ch_umi_dedup_tsv_per_umi = channel.empty() - ch_umi_dedup_tsv_umi_per_position = channel.empty() - - // Initialize multiqc_files with STAR log (always added) - ch_multiqc_files = ch_log_final.collect{ tuple -> tuple[1] } + // SamtoolsResult record contains bam, bai, csi, stats, flagstat, idxstats + ch_samtools = BAM_SORT_STATS_SAMTOOLS.out.result + + ch_genome_bam = ch_samtools.map { r -> [r.meta, r.bam] } + ch_genome_bam_index = ch_samtools.map { r -> [r.meta, bam_csi_index ? r.csi : r.bai] } + ch_transcriptome_bam = ch_star_result.map { r -> [r.meta, r.bam_transcript] } + + // UMI dedup record (empty channel when UMI not enabled) + ch_umi_result = channel.empty() + ch_multiqc_files = ch_star_result.map { r -> r.log_final } - // - // SUBWORKFLOW: Remove duplicate reads from BAM file based on UMIs - // if (with_umi) { def ch_bam_for_dedup = input_genome_bam.mix(ch_genome_bam) def ch_bai_for_dedup = input_genome_bam_index.mix(ch_genome_bam_index) @@ -131,24 +100,11 @@ workflow ALIGN_STAR { transcript_fasta ) - ch_genome_bam = BAM_DEDUP_UMI.out.bam - ch_genome_bam_index = BAM_DEDUP_UMI.out.bai - ch_transcriptome_bam = BAM_DEDUP_UMI.out.transcriptome_bam - ch_umi_genomic_dedup_log = BAM_DEDUP_UMI.out.genomic_dedup_log - ch_umi_transcriptomic_dedup_log = BAM_DEDUP_UMI.out.transcriptomic_dedup_log - ch_umi_prepare_for_rsem_log = BAM_DEDUP_UMI.out.prepare_for_rsem_log - ch_umi_transcriptome_dedup_bam = BAM_DEDUP_UMI.out.transcriptome_dedup_bam - ch_umi_transcriptome_sorted_bam = BAM_DEDUP_UMI.out.transcriptome_sorted_bam - ch_umi_transcriptome_sorted_bam_bai = BAM_DEDUP_UMI.out.transcriptome_sorted_bam_bai - ch_umi_transcriptome_filtered_bam = BAM_DEDUP_UMI.out.transcriptome_filtered_bam - ch_umi_dedup_stats = BAM_DEDUP_UMI.out.stats - ch_umi_dedup_bam = BAM_DEDUP_UMI.out.bam - ch_umi_dedup_bai = BAM_DEDUP_UMI.out.bai - ch_umi_dedup_flagstat = BAM_DEDUP_UMI.out.flagstat - ch_umi_dedup_idxstats = BAM_DEDUP_UMI.out.idxstats - ch_umi_dedup_tsv_edit_distance = BAM_DEDUP_UMI.out.tsv_edit_distance - ch_umi_dedup_tsv_per_umi = BAM_DEDUP_UMI.out.tsv_per_umi - ch_umi_dedup_tsv_umi_per_position = BAM_DEDUP_UMI.out.tsv_umi_per_position + // UmiDedupResult record replaces 15 individual channel captures + ch_umi_result = BAM_DEDUP_UMI.out.result + ch_genome_bam = ch_umi_result.map { r -> [r.meta, r.bam] } + ch_genome_bam_index = ch_umi_result.map { r -> [r.meta, r.bai] } + ch_transcriptome_bam = ch_umi_result.map { r -> [r.meta, r.transcriptome_bam] } ch_versions = ch_versions.mix(BAM_DEDUP_UMI.out.versions) ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_UMI.out.multiqc_files) @@ -158,51 +114,22 @@ workflow ALIGN_STAR { ch_transcriptome_bam = input_transcriptome_bam.mix(ch_transcriptome_bam) if (skip_markduplicates) { - // The deduplicated stats should take priority for MultiQC, but use - // them straight out of the aligner otherwise. If mark duplicates - // will run, those stats will be added later instead to avoid - // duplicate flagstat files in MultiQC. ch_multiqc_files = ch_multiqc_files - .mix(BAM_SORT_STATS_SAMTOOLS.out.stats.collect{ tuple -> tuple[1] }) - .mix(BAM_SORT_STATS_SAMTOOLS.out.flagstat.collect{ tuple -> tuple[1] }) - .mix(BAM_SORT_STATS_SAMTOOLS.out.idxstats.collect{ tuple -> tuple[1] }) + .mix(ch_samtools.map { r -> r.stats }) + .mix(ch_samtools.map { r -> r.flagstat }) + .mix(ch_samtools.map { r -> r.idxstats }) } } + // 35 emits -> 8 emit: - bam = ch_genome_bam // channel: [ val(meta), path(bam) ] - bai = ch_genome_bam_index // channel: [ val(meta), path(bai) ] - csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), path(csi) ] - orig_bam = ch_orig_bam // channel: [ val(meta), path(bam) ] - original aligned BAM before sort/dedup - bam_transcript = ch_transcriptome_bam // channel: [ val(meta), path(bam) ] - transcriptome BAM (deduplicated if UMI) - log_final = ch_log_final // channel: [ val(meta), log_final ] - log_out = ch_log_out // channel: [ val(meta), log_out ] - log_progress = ch_log_progress // channel: [ val(meta), log_progress ] - bam_sorted = ch_bam_sorted // channel: [ val(meta), bam_sorted ] - fastq = ch_fastq // channel: [ val(meta), fastq ] - tab = ch_tab // channel: [ val(meta), tab ] - stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - percent_mapped = ch_percent_mapped // channel: [ val(meta), percent_mapped ] - - // UMI dedup outputs - umi_genomic_dedup_log = ch_umi_genomic_dedup_log - umi_transcriptomic_dedup_log = ch_umi_transcriptomic_dedup_log - umi_prepare_for_rsem_log = ch_umi_prepare_for_rsem_log - umi_transcriptome_dedup_bam = ch_umi_transcriptome_dedup_bam - umi_transcriptome_sorted_bam = ch_umi_transcriptome_sorted_bam - umi_transcriptome_sorted_bam_bai = ch_umi_transcriptome_sorted_bam_bai - umi_transcriptome_filtered_bam = ch_umi_transcriptome_filtered_bam - umi_dedup_stats = ch_umi_dedup_stats - umi_dedup_bam = ch_umi_dedup_bam - umi_dedup_bai = ch_umi_dedup_bai - umi_dedup_flagstat = ch_umi_dedup_flagstat - umi_dedup_idxstats = ch_umi_dedup_idxstats - umi_dedup_tsv_edit_distance = ch_umi_dedup_tsv_edit_distance - umi_dedup_tsv_per_umi = ch_umi_dedup_tsv_per_umi - umi_dedup_tsv_umi_per_position = ch_umi_dedup_tsv_umi_per_position - - multiqc_files = ch_multiqc_files // channel: [ path(files) ] - versions = ch_versions // channel: [ versions.yml ] + bam = ch_genome_bam + bai = ch_genome_bam_index + bam_transcript = ch_transcriptome_bam + star = ch_star_result // channel: StarAlignResult records + samtools = ch_samtools // channel: SamtoolsResult records + umi = ch_umi_result // channel: UmiDedupResult records or empty + percent_mapped = ch_percent_mapped + multiqc_files = ch_multiqc_files + versions = ch_versions } diff --git a/subworkflows/local/quantify_rsem/main.nf b/subworkflows/local/quantify_rsem/main.nf index 1b4463974..cff62dc5c 100644 --- a/subworkflows/local/quantify_rsem/main.nf +++ b/subworkflows/local/quantify_rsem/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + // // Gene/transcript quantification with RSEM // @@ -6,6 +8,20 @@ include { RSEM_CALCULATEEXPRESSION } from '../../../modules/nf-core/rsem/calcula include { RSEM_MERGE_COUNTS } from '../../../modules/local/rsem_merge_counts' include { SENTIEON_RSEMCALCULATEEXPRESSION } from '../../../modules/nf-core/sentieon/rsemcalculateexpression' +record RsemResult { + meta: Map + stat: Path? + logs: Path? + counts_gene: Path? + counts_transcript: Path? + merged_counts_gene: Path? + merged_counts_transcript: Path? + merged_tpm_gene: Path? + merged_tpm_transcript: Path? + merged_genes_long: Path? + merged_isoforms_long: Path? +} + workflow QUANTIFY_RSEM { take: reads // channel: [ val(meta), [ reads ] ] - FASTQ or BAM files @@ -23,39 +39,44 @@ workflow QUANTIFY_RSEM { if (use_sentieon_star){ SENTIEON_RSEMCALCULATEEXPRESSION ( reads, index ) ch_rsem_out = SENTIEON_RSEMCALCULATEEXPRESSION - // SENTIEON_RSEMCALCULATEEXPRESSION uses topic-based version reporting } else { RSEM_CALCULATEEXPRESSION ( reads, index ) ch_rsem_out = RSEM_CALCULATEEXPRESSION - ch_versions = ch_versions.mix(RSEM_CALCULATEEXPRESSION.out.versions.first()) } - ch_counts_gene = ch_rsem_out.out.counts_gene - ch_counts_transcript = ch_rsem_out.out.counts_transcript - ch_stat = ch_rsem_out.out.stat - ch_logs = ch_rsem_out.out.logs + // Extract individual fields from the process record for downstream use + ch_rsem_result = ch_rsem_out.out + ch_counts_gene = ch_rsem_result.map { r -> [r.meta, r.counts_gene] } + ch_counts_transcript = ch_rsem_result.map { r -> [r.meta, r.counts_transcript] } + ch_stat = ch_rsem_result.map { r -> [r.meta, r.stat] } + ch_logs = ch_rsem_result.map { r -> [r.meta, r.logs] } // // Merge counts across samples // RSEM_MERGE_COUNTS ( - ch_counts_gene.collect{ tuple -> tuple[1] }, // [meta, counts]: Collect the second element (counts files) in the channel across all samples + ch_counts_gene.collect{ tuple -> tuple[1] }, ch_counts_transcript.collect{ tuple -> tuple[1] } ) - ch_versions = ch_versions.mix(RSEM_MERGE_COUNTS.out.versions) + + // Combine per-sample process records with pipeline-wide aggregate outputs. + // The RsemMergedResult record lets us combine() once instead of 6 separate times. + ch_merged = RSEM_MERGE_COUNTS.out emit: - counts_gene = ch_counts_gene // channel: [ val(meta), counts ] - counts_transcript = ch_counts_transcript // channel: [ val(meta), counts ] - stat = ch_stat // channel: [ val(meta), stat ] - logs = ch_logs // channel: [ val(meta), logs ] - - merged_counts_gene = RSEM_MERGE_COUNTS.out.counts_gene // path: *.gene_counts.tsv - merged_tpm_gene = RSEM_MERGE_COUNTS.out.tpm_gene // path: *.gene_tpm.tsv - merged_counts_transcript = RSEM_MERGE_COUNTS.out.counts_transcript // path: *.transcript_counts.tsv - merged_tpm_transcript = RSEM_MERGE_COUNTS.out.tpm_transcript // path: *.transcript_tpm.tsv - merged_genes_long = RSEM_MERGE_COUNTS.out.genes_long // path: *.genes_long.tsv - merged_isoforms_long = RSEM_MERGE_COUNTS.out.isoforms_long // path: *.isoforms_long.tsv - - versions = ch_versions // channel: [ versions.yml ] + result = ch_rsem_result + .combine(ch_merged) + .map { calc, merged -> + record( + meta: calc.meta, + stat: calc.stat, logs: calc.logs, + counts_gene: calc.counts_gene, counts_transcript: calc.counts_transcript, + merged_counts_gene: merged.counts_gene, merged_counts_transcript: merged.counts_transcript, + merged_tpm_gene: merged.tpm_gene, merged_tpm_transcript: merged.tpm_transcript, + merged_genes_long: merged.genes_long, merged_isoforms_long: merged.isoforms_long + ) + } + stat = ch_stat + merged_counts_gene = ch_merged.map { r -> r.counts_gene } + versions = ch_versions } diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/main.nf b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/main.nf index df6f8b5c5..0b84d6329 100644 --- a/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/main.nf +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/main.nf @@ -24,11 +24,14 @@ workflow BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE { // Index BAM file and run samtools stats, flagstat and idxstats // SAMTOOLS_INDEX ( UMICOLLAPSE.out.bam ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + // Extract index fields from SamtoolsIndexResult record + ch_index_bai = SAMTOOLS_INDEX.out.map { r -> [r.meta, r.bai] } + ch_index_csi = SAMTOOLS_INDEX.out.map { r -> [r.meta, r.csi] } ch_bam_bai_dedup = UMICOLLAPSE.out.bam - .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) - .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .join(ch_index_bai, by: [0], remainder: true) + .join(ch_index_csi, by: [0], remainder: true) .map { meta, bam, bai, csi -> if (bai) { @@ -44,8 +47,8 @@ workflow BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE { emit: bam = UMICOLLAPSE.out.bam // channel: [ val(meta), path(bam) ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ] - csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), path(csi) ] + bai = ch_index_bai // channel: [ val(meta), path(bai) ] + csi = ch_index_csi // channel: [ val(meta), path(csi) ] dedup_stats = UMICOLLAPSE.out.log // channel: [ val(meta), path(stats) ] stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main.nf b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main.nf index fc3fbb040..1fb9ab8d6 100644 --- a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main.nf +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main.nf @@ -25,11 +25,14 @@ workflow BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS { // Index BAM file and run samtools stats, flagstat and idxstats // SAMTOOLS_INDEX ( UMITOOLS_DEDUP.out.bam ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + // Extract index fields from SamtoolsIndexResult record + ch_index_bai = SAMTOOLS_INDEX.out.map { r -> [r.meta, r.bai] } + ch_index_csi = SAMTOOLS_INDEX.out.map { r -> [r.meta, r.csi] } ch_bam_bai_dedup = UMITOOLS_DEDUP.out.bam - .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) - .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .join(ch_index_bai, by: [0], remainder: true) + .join(ch_index_csi, by: [0], remainder: true) .map { meta, bam, bai, csi -> if (bai) { @@ -49,8 +52,8 @@ workflow BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS { tsv_per_umi = UMITOOLS_DEDUP.out.tsv_per_umi // channel: [ val(meta), path(tsv) ] tsv_umi_per_position = UMITOOLS_DEDUP.out.tsv_umi_per_position // channel: [ val(meta), path(tsv) ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ] - csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), path(csi) ] + bai = ch_index_bai // channel: [ val(meta), path(bai) ] + csi = ch_index_csi // channel: [ val(meta), path(csi) ] stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] diff --git a/subworkflows/nf-core/bam_dedup_umi/main.nf b/subworkflows/nf-core/bam_dedup_umi/main.nf index 775cd1856..5469c080b 100644 --- a/subworkflows/nf-core/bam_dedup_umi/main.nf +++ b/subworkflows/nf-core/bam_dedup_umi/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + // // BAM deduplication with UMI processing // @@ -11,6 +13,29 @@ include { BAM_SORT_STATS_SAMTOOLS include { UMITOOLS_PREPAREFORRSEM } from '../../../modules/nf-core/umitools/prepareforrsem' include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +record UmiDedupResult { + meta: Map + bam: Path + bai: Path + genomic_dedup_log: Path? + transcriptomic_dedup_log: Path? + prepare_for_rsem_log: Path? + transcriptome_bam: Path? + transcriptome_dedup_bam: Path? + transcriptome_sorted_bam: Path? + transcriptome_sorted_bam_bai: Path? + transcriptome_filtered_bam: Path? + genome_stats: Path? + genome_flagstat: Path? + genome_idxstats: Path? + transcriptome_stats: Path? + transcriptome_flagstat: Path? + transcriptome_idxstats: Path? + tsv_edit_distance: Path? + tsv_per_umi: Path? + tsv_umi_per_position: Path? +} + workflow BAM_DEDUP_UMI { take: ch_genome_bam // channel: [ val(meta), path(bam), path(bai) ] @@ -53,21 +78,16 @@ workflow BAM_DEDUP_UMI { ch_tsv_umi_per_position = UMI_DEDUP_GENOME.out.tsv_umi_per_position } - // Co-ordinate sort, index and run stats on transcriptome BAM. This takes - // some preparation- we have to coordinate sort the BAM, run the - // deduplication, then restore name sorting and run a script from umitools - // to prepare for rsem or salmon - - // 1. Coordinate sort - BAM_SORT_STATS_SAMTOOLS ( ch_transcriptome_bam, ch_transcript_fasta ) - ch_sorted_transcriptome_bam = BAM_SORT_STATS_SAMTOOLS.out.bam - .join(BAM_SORT_STATS_SAMTOOLS.out.bai) - // 2. Transcriptome BAM deduplication + // Record field access: extract [meta, bam, bai] from SamtoolsResult record + ch_sorted_transcriptome_bam = BAM_SORT_STATS_SAMTOOLS.out.result + .map { r -> [r.meta, r.bam, r.bai] } + + // Transcriptome BAM deduplication if (umi_dedup_tool == "umicollapse") { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME ( ch_sorted_transcriptome_bam @@ -87,17 +107,18 @@ workflow BAM_DEDUP_UMI { ch_tsv_umi_per_position = ch_tsv_umi_per_position.mix(UMI_DEDUP_TRANSCRIPTOME.out.tsv_umi_per_position) } - // 3. Restore name sorting + // Restore name sorting SAMTOOLS_SORT ( UMI_DEDUP_TRANSCRIPTOME.out.bam, ch_fasta, '' ) - // 4. Run prepare_for_rsem.py on paired-end BAM files - // This fixes paired-end reads in name sorted BAM files - // See: https://github.com/nf-core/rnaseq/issues/828 - ended_transcriptome_dedup_bam = SAMTOOLS_SORT.out.bam + // Extract bam from SamtoolsSortResult record + ch_namesorted_bam = SAMTOOLS_SORT.out.map { r -> [r.meta, r.bam] } + + // Fix paired-end reads in name sorted BAM files + ended_transcriptome_dedup_bam = ch_namesorted_bam .branch { meta, bam -> single_end: meta.single_end @@ -111,12 +132,12 @@ workflow BAM_DEDUP_UMI { .map { meta, bam -> [ meta, bam, [] ] } ) - ch_dedup_transcriptome_bam = ended_transcriptome_dedup_bam.single_end - .mix(UMITOOLS_PREPAREFORRSEM.out.bam) + // Extract fields from PrepareForRsemResult record + ch_rsem_bam = UMITOOLS_PREPAREFORRSEM.out.map { r -> [r.meta, r.bam] } + ch_rsem_log = UMITOOLS_PREPAREFORRSEM.out.map { r -> [r.meta, r.log] } - // Collect files useful for MultiQC into one helpful emission. Don't - // automatically add transcriptome stats- difficult to separate in multiqc - // without a bit more work + ch_dedup_transcriptome_bam = ended_transcriptome_dedup_bam.single_end + .mix(ch_rsem_bam) ch_multiqc_files = ch_genomic_dedup_log .mix(UMI_DEDUP_GENOME.out.stats) @@ -125,29 +146,50 @@ workflow BAM_DEDUP_UMI { .transpose() .map{ item -> item[1] } - // Record versions - ch_versions = UMI_DEDUP_GENOME.out.versions .mix(BAM_SORT_STATS_SAMTOOLS.out.versions) - .mix(UMITOOLS_PREPAREFORRSEM.out.versions) + + // Join all per-sample outputs by meta to construct the record. + // Genome and transcriptome stats are kept as separate fields + // (rather than mixed) to enable clean join-by-meta construction. + ch_bai = bam_csi_index ? UMI_DEDUP_GENOME.out.csi : UMI_DEDUP_GENOME.out.bai emit: - bam = UMI_DEDUP_GENOME.out.bam // channel: [ val(meta), path(bam) ] - bai = bam_csi_index ? UMI_DEDUP_GENOME.out.csi : UMI_DEDUP_GENOME.out.bai // channel: [ val(meta), path(bai) ] - genomic_dedup_log = ch_genomic_dedup_log // channel: [ val(meta), path(log) ] - transcriptomic_dedup_log = ch_transcriptomic_dedup_log // channel: [ val(meta), path(log) ] - prepare_for_rsem_log = UMITOOLS_PREPAREFORRSEM.out.log // channel: [ val(meta), path(log) ] - stats = UMI_DEDUP_GENOME.out.stats.mix(UMI_DEDUP_TRANSCRIPTOME.out.stats) // channel: [ val(meta), path(stats)] - flagstat = UMI_DEDUP_GENOME.out.flagstat.mix(UMI_DEDUP_TRANSCRIPTOME.out.flagstat) // channel: [ val(meta), path(flagstat)] - idxstats = UMI_DEDUP_GENOME.out.idxstats.mix(UMI_DEDUP_TRANSCRIPTOME.out.idxstats) // channel: [ val(meta), path(idxstats)] - tsv_edit_distance = ch_tsv_edit_distance // channel: [ val(meta), path(tsv) ] - tsv_per_umi = ch_tsv_per_umi // channel: [ val(meta), path(tsv) ] - tsv_umi_per_position = ch_tsv_umi_per_position // channel: [ val(meta), path(tsv) ] - multiqc_files = ch_multiqc_files // channel: file - transcriptome_bam = ch_dedup_transcriptome_bam // channel: [ val(meta), path(bam) ] - final output - transcriptome_dedup_bam = UMI_DEDUP_TRANSCRIPTOME.out.bam // channel: [ val(meta), path(bam) ] - after dedup, before name sort - transcriptome_sorted_bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), path(bam) ] - name-sorted - transcriptome_sorted_bam_bai = UMI_DEDUP_TRANSCRIPTOME.out.bai // channel: [ val(meta), path(bai) ] - coordinate-sorted dedup index - transcriptome_filtered_bam = UMITOOLS_PREPAREFORRSEM.out.bam // channel: [ val(meta), path(bam) ] - paired-end filtered - versions = ch_versions // channel: [ path(versions.yml) ] + result = UMI_DEDUP_GENOME.out.bam + .join(ch_bai, by: [0]) + .join(ch_genomic_dedup_log, by: [0], remainder: true) + .join(ch_transcriptomic_dedup_log, by: [0], remainder: true) + .join(ch_rsem_log, by: [0], remainder: true) + .join(ch_dedup_transcriptome_bam, by: [0], remainder: true) + .join(UMI_DEDUP_TRANSCRIPTOME.out.bam, by: [0], remainder: true) + .join(ch_namesorted_bam, by: [0], remainder: true) + .join(UMI_DEDUP_TRANSCRIPTOME.out.bai, by: [0], remainder: true) + .join(ch_rsem_bam, by: [0], remainder: true) + .join(UMI_DEDUP_GENOME.out.stats, by: [0], remainder: true) + .join(UMI_DEDUP_GENOME.out.flagstat, by: [0], remainder: true) + .join(UMI_DEDUP_GENOME.out.idxstats, by: [0], remainder: true) + .join(UMI_DEDUP_TRANSCRIPTOME.out.stats, by: [0], remainder: true) + .join(UMI_DEDUP_TRANSCRIPTOME.out.flagstat, by: [0], remainder: true) + .join(UMI_DEDUP_TRANSCRIPTOME.out.idxstats, by: [0], remainder: true) + .join(ch_tsv_edit_distance, by: [0], remainder: true) + .join(ch_tsv_per_umi, by: [0], remainder: true) + .join(ch_tsv_umi_per_position, by: [0], remainder: true) + .map { meta, bam, bai, g_log, t_log, rsem_log, t_bam, t_dedup_bam, t_sorted_bam, t_sorted_bai, t_filtered_bam, + g_stats, g_flagstat, g_idxstats, t_stats, t_flagstat, t_idxstats, + tsv_ed, tsv_pu, tsv_upp -> + record( + meta: meta, + bam: bam, bai: bai, + genomic_dedup_log: g_log, transcriptomic_dedup_log: t_log, + prepare_for_rsem_log: rsem_log, + transcriptome_bam: t_bam, transcriptome_dedup_bam: t_dedup_bam, + transcriptome_sorted_bam: t_sorted_bam, transcriptome_sorted_bam_bai: t_sorted_bai, + transcriptome_filtered_bam: t_filtered_bam, + genome_stats: g_stats, genome_flagstat: g_flagstat, genome_idxstats: g_idxstats, + transcriptome_stats: t_stats, transcriptome_flagstat: t_flagstat, transcriptome_idxstats: t_idxstats, + tsv_edit_distance: tsv_ed, tsv_per_umi: tsv_pu, tsv_umi_per_position: tsv_upp + ) + } + multiqc_files = ch_multiqc_files + versions = ch_versions } diff --git a/subworkflows/nf-core/bam_markduplicates_picard/main.nf b/subworkflows/nf-core/bam_markduplicates_picard/main.nf index f16cada55..bc094a692 100644 --- a/subworkflows/nf-core/bam_markduplicates_picard/main.nf +++ b/subworkflows/nf-core/bam_markduplicates_picard/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + // // Picard MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats // @@ -6,6 +8,16 @@ include { PICARD_MARKDUPLICATES } from '../../../modules/nf-core/picard/markdupl include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' +record MarkDupResult { + meta: Map + bam: Path + bai: Path + metrics: Path + stats: Path + flagstat: Path + idxstats: Path +} + workflow BAM_MARKDUPLICATES_PICARD { take: @@ -18,17 +30,23 @@ workflow BAM_MARKDUPLICATES_PICARD { ch_versions = channel.empty() PICARD_MARKDUPLICATES ( ch_reads, ch_fasta, ch_fai ) - ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions.first()) - ch_markdup = PICARD_MARKDUPLICATES.out.bam.mix(PICARD_MARKDUPLICATES.out.cram) + // Extract bam/cram from the record for downstream indexing + ch_picard_bam = PICARD_MARKDUPLICATES.out.map { r -> [r.meta, r.bam] }.filter { it[1] } + ch_picard_cram = PICARD_MARKDUPLICATES.out.map { r -> [r.meta, r.cram] }.filter { it[1] } + ch_markdup = ch_picard_bam.mix(ch_picard_cram) SAMTOOLS_INDEX ( ch_markdup ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + // Extract index fields from SamtoolsIndexResult record + ch_index_bai = SAMTOOLS_INDEX.out.map { r -> [r.meta, r.bai] } + ch_index_crai = SAMTOOLS_INDEX.out.map { r -> [r.meta, r.crai] } + ch_index_csi = SAMTOOLS_INDEX.out.map { r -> [r.meta, r.csi] } ch_reads_index = ch_markdup - .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) - .join(SAMTOOLS_INDEX.out.crai, by: [0], remainder: true) - .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .join(ch_index_bai, by: [0], remainder: true) + .join(ch_index_crai, by: [0], remainder: true) + .join(ch_index_csi, by: [0], remainder: true) .map{meta, reads, bai, crai, csi -> if (bai) [ meta, reads, bai ] else if (crai) [ meta, reads, crai ] @@ -38,17 +56,27 @@ workflow BAM_MARKDUPLICATES_PICARD { BAM_STATS_SAMTOOLS ( ch_reads_index, ch_fasta ) ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + ch_picard_metrics = PICARD_MARKDUPLICATES.out.map { r -> [r.meta, r.metrics] } + emit: - bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), path(bam) ] - cram = PICARD_MARKDUPLICATES.out.cram // channel: [ val(meta), path(cram) ] - metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), path(metrics) ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ] - crai = SAMTOOLS_INDEX.out.crai // channel: [ val(meta), path(crai) ] - csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), path(csi) ] - - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] - - versions = ch_versions // channel: [ versions.yml ] + result = ch_picard_bam + .join(ch_index_bai, by: [0], remainder: true) + .join(ch_picard_metrics, by: [0]) + .join(BAM_STATS_SAMTOOLS.out.stats, by: [0]) + .join(BAM_STATS_SAMTOOLS.out.flagstat, by: [0]) + .join(BAM_STATS_SAMTOOLS.out.idxstats, by: [0]) + .map { meta, bam, bai, metrics, stats, flagstat, idxstats -> + record( + meta: meta, + bam: bam, bai: bai, metrics: metrics, + stats: stats, flagstat: flagstat, idxstats: idxstats + ) + } + bam = ch_picard_bam + csi = ch_index_csi + metrics = ch_picard_metrics + stats = BAM_STATS_SAMTOOLS.out.stats + flagstat = BAM_STATS_SAMTOOLS.out.flagstat + idxstats = BAM_STATS_SAMTOOLS.out.idxstats + versions = ch_versions } diff --git a/subworkflows/nf-core/bam_rseqc/main.nf b/subworkflows/nf-core/bam_rseqc/main.nf index bfc93f900..5600a8b07 100644 --- a/subworkflows/nf-core/bam_rseqc/main.nf +++ b/subworkflows/nf-core/bam_rseqc/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + // // Run RSeQC modules // @@ -11,6 +13,23 @@ include { RSEQC_READDISTRIBUTION } from '../../../modules/nf-core/rseqc/readdi include { RSEQC_READDUPLICATION } from '../../../modules/nf-core/rseqc/readduplication/main' include { RSEQC_TIN } from '../../../modules/nf-core/rseqc/tin/main' +include { InnerDistanceResult } from '../../../modules/nf-core/rseqc/innerdistance/main' +include { JunctionAnnotationResult } from '../../../modules/nf-core/rseqc/junctionannotation/main' +include { ReadDuplicationResult } from '../../../modules/nf-core/rseqc/readduplication/main' + +record RSeQCResult { + meta: Map + bamstat: Path? + inferexperiment: Path? + junction_annotation: JunctionAnnotationResult? + junctionsaturation_pdf: Path? + junctionsaturation_r: Path? + read_duplication: ReadDuplicationResult? + readdistribution: Path? + inner_distance: InnerDistanceResult? + tin: Path? +} + workflow BAM_RSEQC { take: bam_bai // channel: [ val(meta), [ bam, bai ] ] @@ -19,156 +38,95 @@ workflow BAM_RSEQC { main: - // bam = bam_bai.map{ [ it[0], it[1][0], it[1][1] ] } bam = bam_bai.map{ it -> [ it[0], it[1][0], it[1][1] ] } - // - // Run RSeQC bam_stat.py - // - bamstat_txt = channel.empty() + // Per-module channels - records for multi-output modules, paths for single-output + bamstat_txt = channel.empty() + inferexperiment_txt = channel.empty() + ch_junction_annotation = channel.empty() // [meta, JunctionAnnotationResult] + junctionsaturation_pdf = channel.empty() + junctionsaturation_r = channel.empty() + ch_read_duplication = channel.empty() // [meta, ReadDuplicationResult] + readdistribution_txt = channel.empty() + ch_inner_distance = channel.empty() // [meta, InnerDistanceResult] + tin_txt = channel.empty() if ('bam_stat' in rseqc_modules) { RSEQC_BAMSTAT(bam) - bamstat_txt = RSEQC_BAMSTAT.out.txt + bamstat_txt = RSEQC_BAMSTAT.out.map { r -> [r.meta, r.txt] } } - // - // Run RSeQC inner_distance.py - // - innerdistance_all = channel.empty() - innerdistance_distance = channel.empty() - innerdistance_freq = channel.empty() - innerdistance_mean = channel.empty() - innerdistance_pdf = channel.empty() - innerdistance_rscript = channel.empty() - if ('inner_distance' in rseqc_modules) { RSEQC_INNERDISTANCE(bam, bed) - innerdistance_distance = RSEQC_INNERDISTANCE.out.distance - innerdistance_freq = RSEQC_INNERDISTANCE.out.freq - innerdistance_mean = RSEQC_INNERDISTANCE.out.mean - innerdistance_pdf = RSEQC_INNERDISTANCE.out.pdf - innerdistance_rscript = RSEQC_INNERDISTANCE.out.rscript - innerdistance_all = innerdistance_distance.mix(innerdistance_freq, innerdistance_mean, innerdistance_pdf, innerdistance_rscript) + ch_inner_distance = RSEQC_INNERDISTANCE.out.map { r -> [r.meta, r] } } - // - // Run RSeQC infer_experiment.py - // - inferexperiment_txt = channel.empty() if ('infer_experiment' in rseqc_modules) { RSEQC_INFEREXPERIMENT(bam, bed) - inferexperiment_txt = RSEQC_INFEREXPERIMENT.out.txt + inferexperiment_txt = RSEQC_INFEREXPERIMENT.out.map { r -> [r.meta, r.txt] } } - // - // Run RSeQC junction_annotation.py - // - junctionannotation_all = channel.empty() - junctionannotation_bed = channel.empty() - junctionannotation_interact_bed = channel.empty() - junctionannotation_xls = channel.empty() - junctionannotation_pdf = channel.empty() - junctionannotation_events_pdf = channel.empty() - junctionannotation_rscript = channel.empty() - junctionannotation_log = channel.empty() - if ('junction_annotation' in rseqc_modules) { RSEQC_JUNCTIONANNOTATION(bam, bed) - junctionannotation_bed = RSEQC_JUNCTIONANNOTATION.out.bed - junctionannotation_interact_bed = RSEQC_JUNCTIONANNOTATION.out.interact_bed - junctionannotation_xls = RSEQC_JUNCTIONANNOTATION.out.xls - junctionannotation_pdf = RSEQC_JUNCTIONANNOTATION.out.pdf - junctionannotation_events_pdf = RSEQC_JUNCTIONANNOTATION.out.events_pdf - junctionannotation_rscript = RSEQC_JUNCTIONANNOTATION.out.rscript - junctionannotation_log = RSEQC_JUNCTIONANNOTATION.out.log - junctionannotation_all = junctionannotation_bed.mix(junctionannotation_interact_bed, junctionannotation_xls, junctionannotation_pdf, junctionannotation_events_pdf, junctionannotation_rscript, junctionannotation_log) + ch_junction_annotation = RSEQC_JUNCTIONANNOTATION.out.map { r -> [r.meta, r] } } - // - // Run RSeQC junction_saturation.py - // - junctionsaturation_all = channel.empty() - junctionsaturation_pdf = channel.empty() - junctionsaturation_rscript = channel.empty() - if ('junction_saturation' in rseqc_modules) { RSEQC_JUNCTIONSATURATION(bam, bed) - junctionsaturation_pdf = RSEQC_JUNCTIONSATURATION.out.pdf - junctionsaturation_rscript = RSEQC_JUNCTIONSATURATION.out.rscript - junctionsaturation_all = junctionsaturation_pdf.mix(junctionsaturation_rscript) + junctionsaturation_pdf = RSEQC_JUNCTIONSATURATION.out.map { r -> [r.meta, r.pdf] } + junctionsaturation_r = RSEQC_JUNCTIONSATURATION.out.map { r -> [r.meta, r.rscript] } } - // - // Run RSeQC read_distribution.py - // - readdistribution_txt = channel.empty() - if ('read_distribution' in rseqc_modules) { RSEQC_READDISTRIBUTION(bam, bed) - readdistribution_txt = RSEQC_READDISTRIBUTION.out.txt + readdistribution_txt = RSEQC_READDISTRIBUTION.out.map { r -> [r.meta, r.txt] } } - // - // Run RSeQC read_duplication.py - // - readduplication_all = channel.empty() - readduplication_seq_xls = channel.empty() - readduplication_pos_xls = channel.empty() - readduplication_pdf = channel.empty() - readduplication_rscript = channel.empty() - if ('read_duplication' in rseqc_modules) { - RSEQC_READDUPLICATION(bam ) - readduplication_seq_xls = RSEQC_READDUPLICATION.out.seq_xls - readduplication_pos_xls = RSEQC_READDUPLICATION.out.pos_xls - readduplication_pdf = RSEQC_READDUPLICATION.out.pdf - readduplication_rscript = RSEQC_READDUPLICATION.out.rscript - readduplication_all = readduplication_seq_xls.mix(readduplication_pos_xls, readduplication_pdf, readduplication_rscript) + RSEQC_READDUPLICATION(bam) + ch_read_duplication = RSEQC_READDUPLICATION.out.map { r -> [r.meta, r] } } - // - // Run RSeQC tin.py - // - tin_txt = channel.empty() - if ('tin' in rseqc_modules) { RSEQC_TIN(bam, bed) - tin_txt = RSEQC_TIN.out.txt + tin_txt = RSEQC_TIN.out.map { r -> [r.meta, r.txt] } } + // Join all per-module outputs by meta, using bam as the driver channel + // (ensures all samples are represented even when modules are skipped). + // Modules that weren't run produce null fields via remainder: true. emit: - bamstat_txt // channel: [ val(meta), txt ] - - innerdistance_all // channel: [ val(meta), {txt, pdf, r} ] - innerdistance_distance // channel: [ val(meta), txt ] - innerdistance_freq // channel: [ val(meta), txt ] - innerdistance_mean // channel: [ val(meta), txt ] - innerdistance_pdf // channel: [ val(meta), pdf ] - innerdistance_rscript // channel: [ val(meta), r ] - - inferexperiment_txt // channel: [ val(meta), txt ] - - junctionannotation_all // channel: [ val(meta), {bed, xls, pdf, r, log} ] - junctionannotation_bed // channel: [ val(meta), bed ] - junctionannotation_interact_bed // channel: [ val(meta), bed ] - junctionannotation_xls // channel: [ val(meta), xls ] - junctionannotation_pdf // channel: [ val(meta), pdf ] - junctionannotation_events_pdf // channel: [ val(meta), pdf ] - junctionannotation_rscript // channel: [ val(meta), r ] - junctionannotation_log // channel: [ val(meta), log ] - - junctionsaturation_all // channel: [ val(meta), {pdf, r} ] - junctionsaturation_pdf // channel: [ val(meta), pdf ] - junctionsaturation_rscript // channel: [ val(meta), r ] - - readdistribution_txt // channel: [ val(meta), txt ] - - readduplication_all // channel: [ val(meta), {xls, pdf, r} ] - readduplication_seq_xls // channel: [ val(meta), xls ] - readduplication_pos_xls // channel: [ val(meta), xls ] - readduplication_pdf // channel: [ val(meta), pdf ] - readduplication_rscript // channel: [ val(meta), r ] - - tin_txt // channel: [ val(meta), txt ] + result = bam + .map { meta, _b, _i -> [meta] } + .join(bamstat_txt, by: [0], remainder: true) + .join(inferexperiment_txt, by: [0], remainder: true) + .join(ch_junction_annotation, by: [0], remainder: true) + .join(junctionsaturation_pdf, by: [0], remainder: true) + .join(junctionsaturation_r, by: [0], remainder: true) + .join(ch_read_duplication, by: [0], remainder: true) + .join(readdistribution_txt, by: [0], remainder: true) + .join(ch_inner_distance, by: [0], remainder: true) + .join(tin_txt, by: [0], remainder: true) + .map { meta, bamstat, inferexp, junction_annot, juncsat_pdf, juncsat_r, read_dup, readdist, inner_dist, tin -> + record( + meta: meta, + bamstat: bamstat, inferexperiment: inferexp, + junction_annotation: junction_annot, + junctionsaturation_pdf: juncsat_pdf, junctionsaturation_r: juncsat_r, + read_duplication: read_dup, + readdistribution: readdist, + inner_distance: inner_dist, + tin: tin + ) + } + inferexperiment = inferexperiment_txt + multiqc_files = bamstat_txt + .mix(inferexperiment_txt) + .mix(ch_inner_distance.map { meta, r -> [meta, r.freq] }) + .mix(ch_junction_annotation.map { meta, r -> [meta, r.log] }) + .mix(junctionsaturation_r) + .mix(readdistribution_txt) + .mix(ch_read_duplication.map { meta, r -> [meta, r.pos_xls] }) + .mix(tin_txt) + .map { _meta, file -> file } } diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/main.nf b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf index e45794941..07e86e5b1 100644 --- a/subworkflows/nf-core/bam_sort_stats_samtools/main.nf +++ b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + // // Sort, index BAM file and run samtools stats, flagstat and idxstats // @@ -6,6 +8,16 @@ include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' +record SamtoolsResult { + meta: Map + bam: Path + bai: Path? + csi: Path? + stats: Path + flagstat: Path + idxstats: Path +} + workflow BAM_SORT_STATS_SAMTOOLS { take: ch_bam // channel: [ val(meta), [ bam ] ] @@ -17,12 +29,17 @@ workflow BAM_SORT_STATS_SAMTOOLS { SAMTOOLS_SORT ( ch_bam, ch_fasta, '' ) - SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + // Extract bam from SamtoolsSortResult record + ch_sorted_bam = SAMTOOLS_SORT.out.map { r -> [r.meta, r.bam] } + + SAMTOOLS_INDEX ( ch_sorted_bam ) - SAMTOOLS_SORT.out.bam - .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) - .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + ch_index_bai = SAMTOOLS_INDEX.out.map { r -> [r.meta, r.bai] } + ch_index_csi = SAMTOOLS_INDEX.out.map { r -> [r.meta, r.csi] } + + ch_sorted_bam + .join(ch_index_bai, by: [0], remainder: true) + .join(ch_index_csi, by: [0], remainder: true) .map { meta, bam, bai, csi -> if (bai) { @@ -36,14 +53,21 @@ workflow BAM_SORT_STATS_SAMTOOLS { BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + // Aggregate outputs from multiple processes into a single record channel + // using join (by meta key) + map to construct the record. emit: - bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] - csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] - - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - + result = ch_sorted_bam + .join(ch_index_bai, by: [0], remainder: true) + .join(ch_index_csi, by: [0], remainder: true) + .join(BAM_STATS_SAMTOOLS.out.stats, by: [0]) + .join(BAM_STATS_SAMTOOLS.out.flagstat, by: [0]) + .join(BAM_STATS_SAMTOOLS.out.idxstats, by: [0]) + .map { meta, bam, bai, csi, stats, flagstat, idxstats -> + record( + meta: meta, + bam: bam, bai: bai, csi: csi, + stats: stats, flagstat: flagstat, idxstats: idxstats + ) + } versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/nf-core/fastq_align_hisat2/main.nf b/subworkflows/nf-core/fastq_align_hisat2/main.nf index a7c2e6279..933830b0d 100644 --- a/subworkflows/nf-core/fastq_align_hisat2/main.nf +++ b/subworkflows/nf-core/fastq_align_hisat2/main.nf @@ -26,18 +26,20 @@ workflow FASTQ_ALIGN_HISAT2 { BAM_SORT_STATS_SAMTOOLS ( HISAT2_ALIGN.out.bam, ch_fasta ) ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + // Extract individual fields from SamtoolsResult record + ch_samtools = BAM_SORT_STATS_SAMTOOLS.out.result emit: - orig_bam = HISAT2_ALIGN.out.bam // channel: [ val(meta), bam ] - summary = HISAT2_ALIGN.out.summary // channel: [ val(meta), log ] - fastq = HISAT2_ALIGN.out.fastq // channel: [ val(meta), fastq ] - - bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] - bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] - csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), [ csi ] ] - stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // channel: [ versions.yml ] + orig_bam = HISAT2_ALIGN.out.bam // channel: [ val(meta), bam ] + summary = HISAT2_ALIGN.out.summary // channel: [ val(meta), log ] + fastq = HISAT2_ALIGN.out.fastq // channel: [ val(meta), fastq ] + + bam = ch_samtools.map { r -> [r.meta, r.bam] } // channel: [ val(meta), [ bam ] ] + bai = ch_samtools.map { r -> [r.meta, r.bai] } // channel: [ val(meta), [ bai ] ] + csi = ch_samtools.map { r -> [r.meta, r.csi] } // channel: [ val(meta), [ csi ] ] + stats = ch_samtools.map { r -> [r.meta, r.stats] } // channel: [ val(meta), [ stats ] ] + flagstat = ch_samtools.map { r -> [r.meta, r.flagstat] } // channel: [ val(meta), [ flagstat ] ] + idxstats = ch_samtools.map { r -> [r.meta, r.idxstats] } // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/nf-core/quantify_pseudo_alignment/main.nf b/subworkflows/nf-core/quantify_pseudo_alignment/main.nf index 7288b541e..53e230106 100644 --- a/subworkflows/nf-core/quantify_pseudo_alignment/main.nf +++ b/subworkflows/nf-core/quantify_pseudo_alignment/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + // // Pseudoalignment and quantification with Salmon or Kallisto // @@ -10,6 +12,22 @@ include { TXIMETA_TXIMPORT } from '../../../modules/nf-core/tximeta/tximport' include { SUMMARIZEDEXPERIMENT_SUMMARIZEDEXPERIMENT as SE_GENE_UNIFIED } from '../../../modules/nf-core/summarizedexperiment/summarizedexperiment' include { SUMMARIZEDEXPERIMENT_SUMMARIZEDEXPERIMENT as SE_TRANSCRIPT_UNIFIED } from '../../../modules/nf-core/summarizedexperiment/summarizedexperiment' +record QuantResult { + meta: Map + results: Path? + tx2gene: Path? + counts_gene: Path? + counts_gene_length_scaled: Path? + counts_gene_scaled: Path? + counts_transcript: Path? + lengths_gene: Path? + lengths_transcript: Path? + tpm_gene: Path? + tpm_transcript: Path? + merged_gene_rds: Path? + merged_transcript_rds: Path? +} + workflow QUANTIFY_PSEUDO_ALIGNMENT { take: samplesheet // channel: [ val(meta), /path/to/samplsheet ] @@ -52,9 +70,8 @@ workflow QUANTIFY_PSEUDO_ALIGNMENT { kallisto_quant_fraglen, kallisto_quant_fraglen_sd ) - ch_pseudo_results = KALLISTO_QUANT.out.results - ch_pseudo_multiqc = KALLISTO_QUANT.out.log - ch_versions = ch_versions.mix(KALLISTO_QUANT.out.versions.first()) + ch_pseudo_results = KALLISTO_QUANT.out.map { r -> [r.meta, r.results] } + ch_pseudo_multiqc = KALLISTO_QUANT.out.map { r -> [r.meta, r.log] } } CUSTOM_TX2GENE ( @@ -71,14 +88,10 @@ workflow QUANTIFY_PSEUDO_ALIGNMENT { CUSTOM_TX2GENE.out.tx2gene, pseudo_aligner ) - ch_versions = ch_versions.mix(TXIMETA_TXIMPORT.out.versions) - ch_gene_unified = TXIMETA_TXIMPORT.out.counts_gene - .join(TXIMETA_TXIMPORT.out.counts_gene_length_scaled) - .join(TXIMETA_TXIMPORT.out.counts_gene_scaled) - .join(TXIMETA_TXIMPORT.out.lengths_gene) - .join(TXIMETA_TXIMPORT.out.tpm_gene) - .map{ row -> tuple(row[0], row.tail()) } + // Extract fields directly from TximportResult record (replaces 5 join() calls) + ch_gene_unified = TXIMETA_TXIMPORT.out + .map { r -> tuple(r.meta, [r.counts_gene, r.counts_gene_length_scaled, r.counts_gene_scaled, r.lengths_gene, r.tpm_gene]) } SE_GENE_UNIFIED ( ch_gene_unified, @@ -87,10 +100,8 @@ workflow QUANTIFY_PSEUDO_ALIGNMENT { ) ch_versions = ch_versions.mix(SE_GENE_UNIFIED.out.versions) - ch_transcript_unified = TXIMETA_TXIMPORT.out.counts_transcript - .join(TXIMETA_TXIMPORT.out.lengths_transcript) - .join(TXIMETA_TXIMPORT.out.tpm_transcript) - .map{ row -> tuple(row[0], row.tail()) } + ch_transcript_unified = TXIMETA_TXIMPORT.out + .map { r -> tuple(r.meta, [r.counts_transcript, r.lengths_transcript, r.tpm_transcript]) } SE_TRANSCRIPT_UNIFIED ( ch_transcript_unified, @@ -99,22 +110,28 @@ workflow QUANTIFY_PSEUDO_ALIGNMENT { ) ch_versions = ch_versions.mix(SE_TRANSCRIPT_UNIFIED.out.versions) - emit: - results = ch_pseudo_results // channel: [ val(meta), results_dir ] - multiqc = ch_pseudo_multiqc // channel: [ val(meta), files_for_multiqc ] - tx2gene = CUSTOM_TX2GENE.out.tx2gene // channel: [ val(meta), tx2gene.tsv ] + // Combine per-sample results with pipeline-wide aggregate outputs. + // The TximportResult record lets us combine() once instead of 8 separate times. + ch_tximport = TXIMETA_TXIMPORT.out - tpm_gene = TXIMETA_TXIMPORT.out.tpm_gene // path: *gene_tpm.tsv - counts_gene = TXIMETA_TXIMPORT.out.counts_gene // path: *gene_counts.tsv - lengths_gene = TXIMETA_TXIMPORT.out.lengths_gene // path: *gene_lengths.tsv - counts_gene_length_scaled = TXIMETA_TXIMPORT.out.counts_gene_length_scaled // path: *gene_counts_length_scaled.tsv - counts_gene_scaled = TXIMETA_TXIMPORT.out.counts_gene_scaled // path: *gene_counts_scaled.tsv - tpm_transcript = TXIMETA_TXIMPORT.out.tpm_transcript // path: *gene_tpm.tsv - counts_transcript = TXIMETA_TXIMPORT.out.counts_transcript // path: *transcript_counts.tsv - lengths_transcript = TXIMETA_TXIMPORT.out.lengths_transcript // path: *transcript_lengths.tsv - - merged_gene_rds_unified = SE_GENE_UNIFIED.out.rds // path: *.rds - merged_transcript_rds_unified = SE_TRANSCRIPT_UNIFIED.out.rds // path: *.rds - - versions = ch_versions // channel: [ versions.yml ] + emit: + result = ch_pseudo_results + .combine(CUSTOM_TX2GENE.out.tx2gene.map { _meta, path -> path }) + .combine(ch_tximport) + .combine(SE_GENE_UNIFIED.out.rds.map { _meta, path -> path }) + .combine(SE_TRANSCRIPT_UNIFIED.out.rds.map { _meta, path -> path }) + .map { meta, results, tx2gene, txi, rds_g, rds_t -> + record( + meta: meta, + results: results, tx2gene: tx2gene, + counts_gene: txi.counts_gene, counts_gene_length_scaled: txi.counts_gene_length_scaled, + counts_gene_scaled: txi.counts_gene_scaled, counts_transcript: txi.counts_transcript, + lengths_gene: txi.lengths_gene, lengths_transcript: txi.lengths_transcript, + tpm_gene: txi.tpm_gene, tpm_transcript: txi.tpm_transcript, + merged_gene_rds: rds_g, merged_transcript_rds: rds_t + ) + } + multiqc = ch_pseudo_multiqc + counts_gene_length_scaled = ch_tximport.map { r -> [r.meta, r.counts_gene_length_scaled] } + versions = ch_versions } diff --git a/workflows/rnaseq/main.nf b/workflows/rnaseq/main.nf index f42b279b0..0ae660ec1 100755 --- a/workflows/rnaseq/main.nf +++ b/workflows/rnaseq/main.nf @@ -109,6 +109,24 @@ workflow RNASEQ { ch_percent_mapped = channel.empty() // Channel initializations for workflow outputs + // Records replace groups of related channel.empty() declarations + ch_markdup_result = channel.empty() // MarkDupResult record + ch_dupradar_result = channel.empty() // DupRadarResult record + ch_rseqc_result = channel.empty() // RSeQCResult record + ch_deseq2_result = channel.empty() // DeSeq2Result record + ch_pseudo_deseq2_result = channel.empty() // DeSeq2Result record (pseudo-aligner) + ch_kraken_report = channel.empty() + ch_bracken_txt = channel.empty() + ch_sylph_profile = channel.empty() + ch_sylphtax_output = channel.empty() + ch_pseudo_result = channel.empty() // QuantResult record + ch_rsem_result = channel.empty() // RsemResult record + ch_star_salmon_result = channel.empty() // QuantResult record + ch_star_result = channel.empty() // StarAlignResult record + ch_samtools_result = channel.empty() // SamtoolsResult record + ch_umi_result = channel.empty() // UmiDedupResult record + + // Individual channels that don't fit into record groups ch_stringtie_gtf = channel.empty() ch_stringtie_coverage = channel.empty() ch_stringtie_abundance = channel.empty() @@ -120,117 +138,9 @@ workflow RNASEQ { ch_bigwig_reverse = channel.empty() ch_preseq_txt = channel.empty() ch_preseq_log = channel.empty() - ch_markdup_bam = channel.empty() - ch_markdup_bai = channel.empty() - ch_markdup_metrics = channel.empty() - ch_markdup_stats = channel.empty() - ch_markdup_flagstat = channel.empty() - ch_markdup_idxstats = channel.empty() ch_qualimap_results = channel.empty() - ch_dupradar_scatter = channel.empty() - ch_dupradar_boxplot = channel.empty() - ch_dupradar_histogram = channel.empty() - ch_dupradar_gene_data = channel.empty() - ch_dupradar_intercept = channel.empty() - ch_rseqc_bamstat = channel.empty() - ch_rseqc_inferexperiment = channel.empty() - ch_rseqc_junctionannotation_bed = channel.empty() - ch_rseqc_junctionannotation_xls = channel.empty() - ch_rseqc_junctionannotation_log = channel.empty() - ch_rseqc_junctionannotation_pdf = channel.empty() - ch_rseqc_junctionannotation_events_pdf = channel.empty() - ch_rseqc_junctionannotation_r = channel.empty() - ch_rseqc_junctionsaturation_pdf = channel.empty() - ch_rseqc_junctionsaturation_r = channel.empty() - ch_rseqc_readduplication_pos_xls = channel.empty() - ch_rseqc_readduplication_seq_xls = channel.empty() - ch_rseqc_readduplication_pdf = channel.empty() - ch_rseqc_readduplication_r = channel.empty() - ch_rseqc_readdistribution = channel.empty() - ch_rseqc_innerdistance_txt = channel.empty() - ch_rseqc_innerdistance_distance = channel.empty() - ch_rseqc_innerdistance_mean = channel.empty() - ch_rseqc_innerdistance_pdf = channel.empty() - ch_rseqc_innerdistance_r = channel.empty() - ch_rseqc_tin = channel.empty() - ch_rseqc_junctionannotation_interact_bed = channel.empty() - ch_kraken_report = channel.empty() - ch_bracken_txt = channel.empty() - ch_sylph_profile = channel.empty() - ch_sylphtax_output = channel.empty() - ch_pseudo_quant = channel.empty() - ch_pseudo_tx2gene = channel.empty() - ch_pseudo_counts_gene = channel.empty() - ch_pseudo_counts_gene_length_scaled = channel.empty() - ch_pseudo_counts_gene_scaled = channel.empty() - ch_pseudo_counts_transcript = channel.empty() - ch_pseudo_lengths_gene = channel.empty() - ch_pseudo_lengths_transcript = channel.empty() - ch_pseudo_tpm_gene = channel.empty() - ch_pseudo_tpm_transcript = channel.empty() - ch_pseudo_merged_gene_rds = channel.empty() - ch_pseudo_merged_transcript_rds = channel.empty() - ch_rsem_stat = channel.empty() - ch_rsem_logs = channel.empty() - ch_rsem_counts_gene = channel.empty() - ch_rsem_counts_transcript = channel.empty() - ch_rsem_tpm_gene = channel.empty() - ch_rsem_tpm_transcript = channel.empty() - ch_rsem_merged_counts_gene = channel.empty() - ch_rsem_merged_counts_transcript = channel.empty() - ch_rsem_merged_genes_long = channel.empty() - ch_rsem_merged_isoforms_long = channel.empty() - ch_star_salmon_quant = channel.empty() - ch_star_salmon_tx2gene = channel.empty() - ch_star_salmon_counts_gene = channel.empty() - ch_star_salmon_counts_gene_length_scaled = channel.empty() - ch_star_salmon_counts_gene_scaled = channel.empty() - ch_star_salmon_counts_transcript = channel.empty() - ch_star_salmon_lengths_gene = channel.empty() - ch_star_salmon_lengths_transcript = channel.empty() - ch_star_salmon_tpm_gene = channel.empty() - ch_star_salmon_tpm_transcript = channel.empty() - ch_star_salmon_merged_gene_rds = channel.empty() - ch_star_salmon_merged_transcript_rds = channel.empty() - ch_deseq2_pca = channel.empty() - ch_deseq2_dists = channel.empty() - ch_deseq2_pdf = channel.empty() - ch_deseq2_rdata = channel.empty() - ch_deseq2_pca_txt = channel.empty() - ch_deseq2_dists_txt = channel.empty() - ch_deseq2_log = channel.empty() - ch_deseq2_size_factors = channel.empty() - // Separate channels for pseudo-aligner DESeq2 QC (goes to different output path) - ch_pseudo_deseq2_pca = channel.empty() - ch_pseudo_deseq2_dists = channel.empty() - ch_pseudo_deseq2_pdf = channel.empty() - ch_pseudo_deseq2_rdata = channel.empty() - ch_pseudo_deseq2_pca_txt = channel.empty() - ch_pseudo_deseq2_dists_txt = channel.empty() - ch_pseudo_deseq2_log = channel.empty() - ch_pseudo_deseq2_size_factors = channel.empty() ch_hisat2_summary = channel.empty() - ch_star_bam = channel.empty() - ch_star_bai = channel.empty() - ch_sorted_bam_stats = channel.empty() - ch_sorted_bam_flagstat = channel.empty() - ch_sorted_bam_idxstats = channel.empty() ch_transcriptome_bam_out = channel.empty() - ch_umi_genomic_dedup_log = channel.empty() - ch_umi_transcriptomic_dedup_log = channel.empty() - ch_umi_prepare_for_rsem_log = channel.empty() - ch_umi_transcriptome_dedup_bam = channel.empty() - ch_umi_transcriptome_sorted_bam = channel.empty() - ch_umi_transcriptome_sorted_bam_bai = channel.empty() - ch_umi_transcriptome_filtered_bam = channel.empty() - ch_umi_dedup_stats = channel.empty() - ch_umi_dedup_bam = channel.empty() - ch_umi_dedup_bai = channel.empty() - ch_umi_dedup_flagstat = channel.empty() - ch_umi_dedup_idxstats = channel.empty() - ch_umi_dedup_tsv_edit_distance = channel.empty() - ch_umi_dedup_tsv_per_umi = channel.empty() - ch_umi_dedup_tsv_umi_per_position = channel.empty() ch_samtools_bai = channel.empty() // @@ -295,9 +205,11 @@ workflow RNASEQ { SAMTOOLS_INDEX ( ch_genome_bam ) - ch_genome_bam_index = params.bam_csi_index ? SAMTOOLS_INDEX.out.csi : SAMTOOLS_INDEX.out.bai - ch_samtools_bai = params.bam_csi_index ? SAMTOOLS_INDEX.out.csi : SAMTOOLS_INDEX.out.bai // For publishing input BAM indices - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + // Extract index fields from SamtoolsIndexResult record + ch_index_bai = SAMTOOLS_INDEX.out.map { r -> [r.meta, r.bai] } + ch_index_csi = SAMTOOLS_INDEX.out.map { r -> [r.meta, r.csi] } + ch_genome_bam_index = params.bam_csi_index ? ch_index_csi : ch_index_bai + ch_samtools_bai = params.bam_csi_index ? ch_index_csi : ch_index_bai // For publishing input BAM indices // // Run RNA-seq FASTQ preprocessing subworkflow @@ -380,10 +292,6 @@ workflow RNASEQ { // // SUBWORKFLOW: Alignment with STAR and gene/transcript quantification with Salmon // - ch_star_log = channel.empty() - ch_star_log_out = channel.empty() - ch_star_log_progress = channel.empty() - ch_star_tab = channel.empty() ch_unaligned_sequences = channel.empty() if (!params.skip_alignment && (params.aligner == 'star_salmon' || params.aligner == 'star_rsem')) { @@ -421,32 +329,12 @@ workflow RNASEQ { ch_transcriptome_bam = ALIGN_STAR.out.bam_transcript ch_transcriptome_bam_out = ALIGN_STAR.out.bam_transcript ch_percent_mapped = ch_percent_mapped.mix(ALIGN_STAR.out.percent_mapped) - ch_unprocessed_bams = ALIGN_STAR.out.orig_bam.join(ALIGN_STAR.out.bam_transcript) - ch_star_log = ALIGN_STAR.out.log_final - ch_star_log_out = ALIGN_STAR.out.log_out - ch_star_log_progress = ALIGN_STAR.out.log_progress - ch_star_tab = ALIGN_STAR.out.tab - ch_star_bam = ALIGN_STAR.out.bam - ch_star_bai = ALIGN_STAR.out.bai - ch_sorted_bam_stats = ALIGN_STAR.out.stats - ch_sorted_bam_flagstat = ALIGN_STAR.out.flagstat - ch_sorted_bam_idxstats = ALIGN_STAR.out.idxstats - ch_unaligned_sequences = ALIGN_STAR.out.fastq - ch_umi_genomic_dedup_log = ch_umi_genomic_dedup_log.mix(ALIGN_STAR.out.umi_genomic_dedup_log) - ch_umi_transcriptomic_dedup_log = ch_umi_transcriptomic_dedup_log.mix(ALIGN_STAR.out.umi_transcriptomic_dedup_log) - ch_umi_prepare_for_rsem_log = ch_umi_prepare_for_rsem_log.mix(ALIGN_STAR.out.umi_prepare_for_rsem_log) - ch_umi_transcriptome_dedup_bam = ch_umi_transcriptome_dedup_bam.mix(ALIGN_STAR.out.umi_transcriptome_dedup_bam) - ch_umi_transcriptome_sorted_bam = ch_umi_transcriptome_sorted_bam.mix(ALIGN_STAR.out.umi_transcriptome_sorted_bam) - ch_umi_transcriptome_sorted_bam_bai = ch_umi_transcriptome_sorted_bam_bai.mix(ALIGN_STAR.out.umi_transcriptome_sorted_bam_bai) - ch_umi_transcriptome_filtered_bam = ch_umi_transcriptome_filtered_bam.mix(ALIGN_STAR.out.umi_transcriptome_filtered_bam) - ch_umi_dedup_stats = ch_umi_dedup_stats.mix(ALIGN_STAR.out.umi_dedup_stats) - ch_umi_dedup_bam = ch_umi_dedup_bam.mix(ALIGN_STAR.out.umi_dedup_bam) - ch_umi_dedup_bai = ch_umi_dedup_bai.mix(ALIGN_STAR.out.umi_dedup_bai) - ch_umi_dedup_flagstat = ch_umi_dedup_flagstat.mix(ALIGN_STAR.out.umi_dedup_flagstat) - ch_umi_dedup_idxstats = ch_umi_dedup_idxstats.mix(ALIGN_STAR.out.umi_dedup_idxstats) - ch_umi_dedup_tsv_edit_distance = ch_umi_dedup_tsv_edit_distance.mix(ALIGN_STAR.out.umi_dedup_tsv_edit_distance) - ch_umi_dedup_tsv_per_umi = ch_umi_dedup_tsv_per_umi.mix(ALIGN_STAR.out.umi_dedup_tsv_per_umi) - ch_umi_dedup_tsv_umi_per_position = ch_umi_dedup_tsv_umi_per_position.mix(ALIGN_STAR.out.umi_dedup_tsv_umi_per_position) + ch_star_result = ALIGN_STAR.out.star // StarAlignResult record + ch_samtools_result = ALIGN_STAR.out.samtools // SamtoolsResult record + ch_umi_result = ALIGN_STAR.out.umi // UmiDedupResult record + ch_unprocessed_bams = ch_star_result.map { r -> [r.meta, r.bam] } + .join(ch_star_result.map { r -> [r.meta, r.bam_transcript] }) + ch_unaligned_sequences = ch_star_result.map { r -> [r.meta, r.fastq] } ch_multiqc_files = ch_multiqc_files.mix(ALIGN_STAR.out.multiqc_files) ch_versions = ch_versions.mix(ALIGN_STAR.out.versions) } @@ -458,16 +346,7 @@ workflow RNASEQ { ch_rsem_index, params.use_sentieon_star ) - ch_rsem_stat = QUANTIFY_RSEM.out.stat - ch_rsem_logs = QUANTIFY_RSEM.out.logs - ch_rsem_counts_gene = QUANTIFY_RSEM.out.counts_gene - ch_rsem_counts_transcript = QUANTIFY_RSEM.out.counts_transcript - ch_rsem_tpm_gene = QUANTIFY_RSEM.out.merged_tpm_gene - ch_rsem_tpm_transcript = QUANTIFY_RSEM.out.merged_tpm_transcript - ch_rsem_merged_counts_gene = QUANTIFY_RSEM.out.merged_counts_gene - ch_rsem_merged_counts_transcript = QUANTIFY_RSEM.out.merged_counts_transcript - ch_rsem_merged_genes_long = QUANTIFY_RSEM.out.merged_genes_long - ch_rsem_merged_isoforms_long = QUANTIFY_RSEM.out.merged_isoforms_long + ch_rsem_result = QUANTIFY_RSEM.out.result ch_multiqc_files = ch_multiqc_files.mix(QUANTIFY_RSEM.out.stat.collect{ tuple -> tuple[1] }) ch_versions = ch_versions.mix(QUANTIFY_RSEM.out.versions) @@ -477,17 +356,9 @@ workflow RNASEQ { ch_pca_header_multiqc, ch_clustering_header_multiqc ) - ch_deseq2_pca = ch_deseq2_pca.mix(DESEQ2_QC_RSEM.out.pca_multiqc) - ch_deseq2_dists = ch_deseq2_dists.mix(DESEQ2_QC_RSEM.out.dists_multiqc) - ch_deseq2_pdf = ch_deseq2_pdf.mix(DESEQ2_QC_RSEM.out.pdf) - ch_deseq2_rdata = ch_deseq2_rdata.mix(DESEQ2_QC_RSEM.out.rdata) - ch_deseq2_pca_txt = ch_deseq2_pca_txt.mix(DESEQ2_QC_RSEM.out.pca_txt) - ch_deseq2_dists_txt = ch_deseq2_dists_txt.mix(DESEQ2_QC_RSEM.out.dists_txt) - ch_deseq2_log = ch_deseq2_log.mix(DESEQ2_QC_RSEM.out.log) - ch_deseq2_size_factors = ch_deseq2_size_factors.mix(DESEQ2_QC_RSEM.out.size_factors) - ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_RSEM.out.pca_multiqc.collect()) - ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_RSEM.out.dists_multiqc.collect()) - ch_versions = ch_versions.mix(DESEQ2_QC_RSEM.out.versions) + ch_deseq2_result = DESEQ2_QC_RSEM.out + ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_RSEM.out.map { it.pca_multiqc }.collect()) + ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_RSEM.out.map { it.dists_multiqc }.collect()) } } else if (params.aligner == 'star_salmon') { @@ -509,18 +380,7 @@ workflow RNASEQ { params.kallisto_quant_fraglen, params.kallisto_quant_fraglen_sd ) - ch_star_salmon_quant = QUANTIFY_STAR_SALMON.out.results - ch_star_salmon_tx2gene = QUANTIFY_STAR_SALMON.out.tx2gene - ch_star_salmon_counts_gene = QUANTIFY_STAR_SALMON.out.counts_gene - ch_star_salmon_counts_gene_length_scaled = QUANTIFY_STAR_SALMON.out.counts_gene_length_scaled - ch_star_salmon_counts_gene_scaled = QUANTIFY_STAR_SALMON.out.counts_gene_scaled - ch_star_salmon_counts_transcript = QUANTIFY_STAR_SALMON.out.counts_transcript - ch_star_salmon_lengths_gene = QUANTIFY_STAR_SALMON.out.lengths_gene - ch_star_salmon_lengths_transcript = QUANTIFY_STAR_SALMON.out.lengths_transcript - ch_star_salmon_tpm_gene = QUANTIFY_STAR_SALMON.out.tpm_gene - ch_star_salmon_tpm_transcript = QUANTIFY_STAR_SALMON.out.tpm_transcript - ch_star_salmon_merged_gene_rds = QUANTIFY_STAR_SALMON.out.merged_gene_rds_unified - ch_star_salmon_merged_transcript_rds = QUANTIFY_STAR_SALMON.out.merged_transcript_rds_unified + ch_star_salmon_result = QUANTIFY_STAR_SALMON.out.result ch_versions = ch_versions.mix(QUANTIFY_STAR_SALMON.out.versions) if (!params.skip_qc & !params.skip_deseq2_qc) { @@ -529,17 +389,9 @@ workflow RNASEQ { ch_pca_header_multiqc, ch_clustering_header_multiqc ) - ch_deseq2_pca = ch_deseq2_pca.mix(DESEQ2_QC_STAR_SALMON.out.pca_multiqc) - ch_deseq2_dists = ch_deseq2_dists.mix(DESEQ2_QC_STAR_SALMON.out.dists_multiqc) - ch_deseq2_pdf = ch_deseq2_pdf.mix(DESEQ2_QC_STAR_SALMON.out.pdf) - ch_deseq2_rdata = ch_deseq2_rdata.mix(DESEQ2_QC_STAR_SALMON.out.rdata) - ch_deseq2_pca_txt = ch_deseq2_pca_txt.mix(DESEQ2_QC_STAR_SALMON.out.pca_txt) - ch_deseq2_dists_txt = ch_deseq2_dists_txt.mix(DESEQ2_QC_STAR_SALMON.out.dists_txt) - ch_deseq2_log = ch_deseq2_log.mix(DESEQ2_QC_STAR_SALMON.out.log) - ch_deseq2_size_factors = ch_deseq2_size_factors.mix(DESEQ2_QC_STAR_SALMON.out.size_factors) - ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_STAR_SALMON.out.pca_multiqc.collect()) - ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_STAR_SALMON.out.dists_multiqc.collect()) - ch_versions = ch_versions.mix(DESEQ2_QC_STAR_SALMON.out.versions) + ch_deseq2_result = DESEQ2_QC_STAR_SALMON.out + ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_STAR_SALMON.out.map { it.pca_multiqc }.collect()) + ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_STAR_SALMON.out.map { it.dists_multiqc }.collect()) } } @@ -567,24 +419,8 @@ workflow RNASEQ { ch_unprocessed_bams = ALIGN_HISAT2.out.orig_bam.map { meta, bam -> [ meta, bam, '' ] } ch_unaligned_sequences = ALIGN_HISAT2.out.unaligned ch_hisat2_summary = ALIGN_HISAT2.out.summary - ch_sorted_bam_stats = ch_sorted_bam_stats.mix(ALIGN_HISAT2.out.stats) - ch_sorted_bam_flagstat = ch_sorted_bam_flagstat.mix(ALIGN_HISAT2.out.flagstat) - ch_sorted_bam_idxstats = ch_sorted_bam_idxstats.mix(ALIGN_HISAT2.out.idxstats) - ch_umi_genomic_dedup_log = ch_umi_genomic_dedup_log.mix(ALIGN_HISAT2.out.umi_genomic_dedup_log) - ch_umi_transcriptomic_dedup_log = ch_umi_transcriptomic_dedup_log.mix(ALIGN_HISAT2.out.umi_transcriptomic_dedup_log) - ch_umi_prepare_for_rsem_log = ch_umi_prepare_for_rsem_log.mix(ALIGN_HISAT2.out.umi_prepare_for_rsem_log) - ch_umi_transcriptome_dedup_bam = ch_umi_transcriptome_dedup_bam.mix(ALIGN_HISAT2.out.umi_transcriptome_dedup_bam) - ch_umi_transcriptome_sorted_bam = ch_umi_transcriptome_sorted_bam.mix(ALIGN_HISAT2.out.umi_transcriptome_sorted_bam) - ch_umi_transcriptome_sorted_bam_bai = ch_umi_transcriptome_sorted_bam_bai.mix(ALIGN_HISAT2.out.umi_transcriptome_sorted_bam_bai) - ch_umi_transcriptome_filtered_bam = ch_umi_transcriptome_filtered_bam.mix(ALIGN_HISAT2.out.umi_transcriptome_filtered_bam) - ch_umi_dedup_stats = ch_umi_dedup_stats.mix(ALIGN_HISAT2.out.umi_dedup_stats) - ch_umi_dedup_bam = ch_umi_dedup_bam.mix(ALIGN_HISAT2.out.umi_dedup_bam) - ch_umi_dedup_bai = ch_umi_dedup_bai.mix(ALIGN_HISAT2.out.umi_dedup_bai) - ch_umi_dedup_flagstat = ch_umi_dedup_flagstat.mix(ALIGN_HISAT2.out.umi_dedup_flagstat) - ch_umi_dedup_idxstats = ch_umi_dedup_idxstats.mix(ALIGN_HISAT2.out.umi_dedup_idxstats) - ch_umi_dedup_tsv_edit_distance = ch_umi_dedup_tsv_edit_distance.mix(ALIGN_HISAT2.out.umi_dedup_tsv_edit_distance) - ch_umi_dedup_tsv_per_umi = ch_umi_dedup_tsv_per_umi.mix(ALIGN_HISAT2.out.umi_dedup_tsv_per_umi) - ch_umi_dedup_tsv_umi_per_position = ch_umi_dedup_tsv_umi_per_position.mix(ALIGN_HISAT2.out.umi_dedup_tsv_umi_per_position) + ch_samtools_result = ALIGN_HISAT2.out.stats // HISAT2 still emits individual stats + ch_umi_result = ch_umi_result.mix(ALIGN_HISAT2.out.umi) ch_multiqc_files = ch_multiqc_files.mix(ALIGN_HISAT2.out.multiqc_files) ch_versions = ch_versions.mix(ALIGN_HISAT2.out.versions) } @@ -647,10 +483,9 @@ workflow RNASEQ { PRESEQ_LCEXTRAP ( ch_genome_bam ) - ch_preseq_txt = PRESEQ_LCEXTRAP.out.lc_extrap - ch_preseq_log = PRESEQ_LCEXTRAP.out.log - ch_multiqc_files = ch_multiqc_files.mix(PRESEQ_LCEXTRAP.out.lc_extrap.collect{ tuple -> tuple[1] }) - ch_versions = ch_versions.mix(PRESEQ_LCEXTRAP.out.versions.first()) + ch_preseq_txt = PRESEQ_LCEXTRAP.out.map { r -> [r.meta, r.lc_extrap] } + ch_preseq_log = PRESEQ_LCEXTRAP.out.map { r -> [r.meta, r.log] } + ch_multiqc_files = ch_multiqc_files.mix(ch_preseq_txt.collect{ tuple -> tuple[1] }) } // @@ -663,13 +498,8 @@ workflow RNASEQ { ch_fai.map { item -> [ [:], item ] } ) ch_genome_bam = BAM_MARKDUPLICATES_PICARD.out.bam - ch_genome_bam_index = params.bam_csi_index ? BAM_MARKDUPLICATES_PICARD.out.csi : BAM_MARKDUPLICATES_PICARD.out.bai - ch_markdup_bam = BAM_MARKDUPLICATES_PICARD.out.bam - ch_markdup_bai = params.bam_csi_index ? BAM_MARKDUPLICATES_PICARD.out.csi : BAM_MARKDUPLICATES_PICARD.out.bai - ch_markdup_metrics = BAM_MARKDUPLICATES_PICARD.out.metrics - ch_markdup_stats = BAM_MARKDUPLICATES_PICARD.out.stats - ch_markdup_flagstat = BAM_MARKDUPLICATES_PICARD.out.flagstat - ch_markdup_idxstats = BAM_MARKDUPLICATES_PICARD.out.idxstats + ch_genome_bam_index = params.bam_csi_index ? BAM_MARKDUPLICATES_PICARD.out.csi : BAM_MARKDUPLICATES_PICARD.out.result.map { r -> [r.meta, r.bai] } + ch_markdup_result = BAM_MARKDUPLICATES_PICARD.out.result ch_multiqc_files = ch_multiqc_files.mix(BAM_MARKDUPLICATES_PICARD.out.stats.collect{ tuple -> tuple[1] }) ch_multiqc_files = ch_multiqc_files.mix(BAM_MARKDUPLICATES_PICARD.out.flagstat.collect{ tuple -> tuple[1] }) ch_multiqc_files = ch_multiqc_files.mix(BAM_MARKDUPLICATES_PICARD.out.idxstats.collect{ tuple -> tuple[1] }) @@ -779,7 +609,7 @@ workflow RNASEQ { ) QUALIMAP_RNASEQ ( - SAMTOOLS_SORT_QUALIMAP.out.bam, + SAMTOOLS_SORT_QUALIMAP.out.map { r -> [r.meta, r.bam] }, ch_gtf.map { item -> [ [:], item ] } ) ch_qualimap_results = QUALIMAP_RNASEQ.out.results @@ -792,13 +622,8 @@ workflow RNASEQ { ch_genome_bam, ch_gtf.map { item -> [ [:], item ] } ) - ch_dupradar_scatter = DUPRADAR.out.scatter2d - ch_dupradar_boxplot = DUPRADAR.out.boxplot - ch_dupradar_histogram = DUPRADAR.out.hist - ch_dupradar_gene_data = DUPRADAR.out.dupmatrix - ch_dupradar_intercept = DUPRADAR.out.intercept_slope - ch_multiqc_files = ch_multiqc_files.mix(DUPRADAR.out.multiqc.collect{ tuple -> tuple[1] }) - ch_versions = ch_versions.mix(DUPRADAR.out.versions.first()) + ch_dupradar_result = DUPRADAR.out + ch_multiqc_files = ch_multiqc_files.mix(DUPRADAR.out.map { r -> [r.meta, r.multiqc] }.collect{ tuple -> tuple[1] }) } // Get RSeqC modules to run @@ -816,39 +641,11 @@ workflow RNASEQ { ch_gene_bed, rseqc_modules ) - ch_rseqc_bamstat = BAM_RSEQC.out.bamstat_txt - ch_rseqc_inferexperiment = BAM_RSEQC.out.inferexperiment_txt - ch_rseqc_junctionannotation_bed = BAM_RSEQC.out.junctionannotation_bed - ch_rseqc_junctionannotation_interact_bed = BAM_RSEQC.out.junctionannotation_interact_bed - ch_rseqc_junctionannotation_xls = BAM_RSEQC.out.junctionannotation_xls - ch_rseqc_junctionannotation_log = BAM_RSEQC.out.junctionannotation_log - ch_rseqc_junctionannotation_pdf = BAM_RSEQC.out.junctionannotation_pdf - ch_rseqc_junctionannotation_events_pdf = BAM_RSEQC.out.junctionannotation_events_pdf - ch_rseqc_junctionannotation_r = BAM_RSEQC.out.junctionannotation_rscript - ch_rseqc_junctionsaturation_pdf = BAM_RSEQC.out.junctionsaturation_pdf - ch_rseqc_junctionsaturation_r = BAM_RSEQC.out.junctionsaturation_rscript - ch_rseqc_readduplication_pos_xls = BAM_RSEQC.out.readduplication_pos_xls - ch_rseqc_readduplication_seq_xls = BAM_RSEQC.out.readduplication_seq_xls - ch_rseqc_readduplication_pdf = BAM_RSEQC.out.readduplication_pdf - ch_rseqc_readduplication_r = BAM_RSEQC.out.readduplication_rscript - ch_rseqc_readdistribution = BAM_RSEQC.out.readdistribution_txt - ch_rseqc_innerdistance_txt = BAM_RSEQC.out.innerdistance_freq - ch_rseqc_innerdistance_distance = BAM_RSEQC.out.innerdistance_distance - ch_rseqc_innerdistance_mean = BAM_RSEQC.out.innerdistance_mean - ch_rseqc_innerdistance_pdf = BAM_RSEQC.out.innerdistance_pdf - ch_rseqc_innerdistance_r = BAM_RSEQC.out.innerdistance_rscript - ch_rseqc_tin = BAM_RSEQC.out.tin_txt - ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.bamstat_txt.collect{ tuple -> tuple[1] }) - ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.inferexperiment_txt.collect{ tuple -> tuple[1] }) - ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.innerdistance_freq.collect{ tuple -> tuple[1] }) - ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.junctionannotation_log.collect{ tuple -> tuple[1] }) - ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.junctionsaturation_rscript.collect{ tuple -> tuple[1] }) - ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.readdistribution_txt.collect{ tuple -> tuple[1] }) - ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.readduplication_pos_xls.collect{ tuple -> tuple[1] }) - ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.tin_txt.collect{ tuple -> tuple[1] }) + ch_rseqc_result = BAM_RSEQC.out.result // RSeQCResult record (22 outputs -> 1) + ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.multiqc_files.collect()) // Compare predicted supplied or Salmon-predicted strand with what we get from RSeQC - ch_strand_comparison = BAM_RSEQC.out.inferexperiment_txt + ch_strand_comparison = BAM_RSEQC.out.inferexperiment .map { meta, strand_log -> def rseqc_inferred_strand = getInferexperimentStrandedness(strand_log, params.stranded_threshold, params.unstranded_threshold) @@ -978,18 +775,7 @@ workflow RNASEQ { params.kallisto_quant_fraglen_sd ) ch_counts_gene_length_scaled = QUANTIFY_PSEUDO_ALIGNMENT.out.counts_gene_length_scaled - ch_pseudo_quant = QUANTIFY_PSEUDO_ALIGNMENT.out.results - ch_pseudo_tx2gene = QUANTIFY_PSEUDO_ALIGNMENT.out.tx2gene - ch_pseudo_counts_gene = QUANTIFY_PSEUDO_ALIGNMENT.out.counts_gene - ch_pseudo_counts_gene_length_scaled = QUANTIFY_PSEUDO_ALIGNMENT.out.counts_gene_length_scaled - ch_pseudo_counts_gene_scaled = QUANTIFY_PSEUDO_ALIGNMENT.out.counts_gene_scaled - ch_pseudo_counts_transcript = QUANTIFY_PSEUDO_ALIGNMENT.out.counts_transcript - ch_pseudo_lengths_gene = QUANTIFY_PSEUDO_ALIGNMENT.out.lengths_gene - ch_pseudo_lengths_transcript = QUANTIFY_PSEUDO_ALIGNMENT.out.lengths_transcript - ch_pseudo_tpm_gene = QUANTIFY_PSEUDO_ALIGNMENT.out.tpm_gene - ch_pseudo_tpm_transcript = QUANTIFY_PSEUDO_ALIGNMENT.out.tpm_transcript - ch_pseudo_merged_gene_rds = QUANTIFY_PSEUDO_ALIGNMENT.out.merged_gene_rds_unified - ch_pseudo_merged_transcript_rds = QUANTIFY_PSEUDO_ALIGNMENT.out.merged_transcript_rds_unified + ch_pseudo_result = QUANTIFY_PSEUDO_ALIGNMENT.out.result ch_multiqc_files = ch_multiqc_files.mix(QUANTIFY_PSEUDO_ALIGNMENT.out.multiqc.collect{ tuple -> tuple[1] }) ch_versions = ch_versions.mix(QUANTIFY_PSEUDO_ALIGNMENT.out.versions) @@ -999,18 +785,9 @@ workflow RNASEQ { ch_pca_header_multiqc, ch_clustering_header_multiqc ) - // Use separate channels for pseudo-aligner DESeq2 (published to different path) - ch_pseudo_deseq2_pca = ch_pseudo_deseq2_pca.mix(DESEQ2_QC_PSEUDO.out.pca_multiqc) - ch_pseudo_deseq2_dists = ch_pseudo_deseq2_dists.mix(DESEQ2_QC_PSEUDO.out.dists_multiqc) - ch_pseudo_deseq2_pdf = ch_pseudo_deseq2_pdf.mix(DESEQ2_QC_PSEUDO.out.pdf) - ch_pseudo_deseq2_rdata = ch_pseudo_deseq2_rdata.mix(DESEQ2_QC_PSEUDO.out.rdata) - ch_pseudo_deseq2_pca_txt = ch_pseudo_deseq2_pca_txt.mix(DESEQ2_QC_PSEUDO.out.pca_txt) - ch_pseudo_deseq2_dists_txt = ch_pseudo_deseq2_dists_txt.mix(DESEQ2_QC_PSEUDO.out.dists_txt) - ch_pseudo_deseq2_log = ch_pseudo_deseq2_log.mix(DESEQ2_QC_PSEUDO.out.log) - ch_pseudo_deseq2_size_factors = ch_pseudo_deseq2_size_factors.mix(DESEQ2_QC_PSEUDO.out.size_factors) - ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_PSEUDO.out.pca_multiqc.collect()) - ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_PSEUDO.out.dists_multiqc.collect()) - ch_versions = ch_versions.mix(DESEQ2_QC_PSEUDO.out.versions) + ch_pseudo_deseq2_result = DESEQ2_QC_PSEUDO.out + ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_PSEUDO.out.map { it.pca_multiqc }.collect()) + ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_PSEUDO.out.map { it.dists_multiqc }.collect()) } } @@ -1156,21 +933,7 @@ workflow RNASEQ { trim_unpaired = ch_trim_unpaired umi_log = ch_umi_log umi_reads = ch_umi_reads - umi_genomic_dedup_log = ch_umi_genomic_dedup_log - umi_transcriptomic_dedup_log = ch_umi_transcriptomic_dedup_log - umi_prepare_for_rsem_log = ch_umi_prepare_for_rsem_log - umi_transcriptome_dedup_bam = ch_umi_transcriptome_dedup_bam - umi_transcriptome_sorted_bam = ch_umi_transcriptome_sorted_bam - umi_transcriptome_sorted_bam_bai = ch_umi_transcriptome_sorted_bam_bai - umi_transcriptome_filtered_bam = ch_umi_transcriptome_filtered_bam - umi_dedup_stats = ch_umi_dedup_stats - umi_dedup_bam = ch_umi_dedup_bam - umi_dedup_bai = ch_umi_dedup_bai - umi_dedup_flagstat = ch_umi_dedup_flagstat - umi_dedup_idxstats = ch_umi_dedup_idxstats - umi_dedup_tsv_edit_distance = ch_umi_dedup_tsv_edit_distance - umi_dedup_tsv_per_umi = ch_umi_dedup_tsv_per_umi - umi_dedup_tsv_umi_per_position = ch_umi_dedup_tsv_umi_per_position + umi_dedup = ch_umi_result // UmiDedupResult record (15 individual channels -> 1) lint_log_raw = ch_lint_log_raw lint_log_trimmed = ch_lint_log_trimmed lint_log_bbsplit = ch_lint_log_bbsplit @@ -1184,62 +947,25 @@ workflow RNASEQ { seqkit_prefixed = ch_seqkit_prefixed seqkit_converted = ch_seqkit_converted - // Alignment outputs - star_log = ch_star_log - star_log_out = ch_star_log_out - star_log_progress = ch_star_log_progress - star_tab = ch_star_tab - star_bam = ch_star_bam - star_bai = ch_star_bai - sorted_bam_stats = ch_sorted_bam_stats - sorted_bam_flagstat = ch_sorted_bam_flagstat - sorted_bam_idxstats = ch_sorted_bam_idxstats + // Alignment outputs (records replace 13 individual channels) + star = ch_star_result // StarAlignResult record (logs, BAMs, fastq, tab) + samtools = ch_samtools_result // SamtoolsResult record (sorted bam, stats) transcriptome_bam = ch_transcriptome_bam_out unaligned_sequences = ch_unaligned_sequences hisat2_summary = ch_hisat2_summary - samtools_bai = ch_samtools_bai // Input BAM indices for BAM input mode + samtools_bai = ch_samtools_bai - // MarkDuplicates outputs - markdup_bam = ch_markdup_bam - markdup_bai = ch_markdup_bai - markdup_metrics = ch_markdup_metrics - markdup_stats = ch_markdup_stats - markdup_flagstat = ch_markdup_flagstat - markdup_idxstats = ch_markdup_idxstats + // MarkDuplicates outputs (6 individual channels -> 1 record) + markdup = ch_markdup_result // QC outputs preseq_txt = ch_preseq_txt preseq_log = ch_preseq_log qualimap_results = ch_qualimap_results - dupradar_scatter = ch_dupradar_scatter - dupradar_boxplot = ch_dupradar_boxplot - dupradar_histogram = ch_dupradar_histogram - dupradar_gene_data = ch_dupradar_gene_data - dupradar_intercept = ch_dupradar_intercept - - // RSeQC outputs - rseqc_bamstat = ch_rseqc_bamstat - rseqc_inferexperiment = ch_rseqc_inferexperiment - rseqc_junctionannotation_bed = ch_rseqc_junctionannotation_bed - rseqc_junctionannotation_interact_bed = ch_rseqc_junctionannotation_interact_bed - rseqc_junctionannotation_xls = ch_rseqc_junctionannotation_xls - rseqc_junctionannotation_log = ch_rseqc_junctionannotation_log - rseqc_junctionannotation_pdf = ch_rseqc_junctionannotation_pdf - rseqc_junctionannotation_events_pdf = ch_rseqc_junctionannotation_events_pdf - rseqc_junctionannotation_r = ch_rseqc_junctionannotation_r - rseqc_junctionsaturation_pdf = ch_rseqc_junctionsaturation_pdf - rseqc_junctionsaturation_r = ch_rseqc_junctionsaturation_r - rseqc_readduplication_pos_xls = ch_rseqc_readduplication_pos_xls - rseqc_readduplication_seq_xls = ch_rseqc_readduplication_seq_xls - rseqc_readduplication_pdf = ch_rseqc_readduplication_pdf - rseqc_readduplication_r = ch_rseqc_readduplication_r - rseqc_readdistribution = ch_rseqc_readdistribution - rseqc_innerdistance_txt = ch_rseqc_innerdistance_txt - rseqc_innerdistance_distance = ch_rseqc_innerdistance_distance - rseqc_innerdistance_mean = ch_rseqc_innerdistance_mean - rseqc_innerdistance_pdf = ch_rseqc_innerdistance_pdf - rseqc_innerdistance_r = ch_rseqc_innerdistance_r - rseqc_tin = ch_rseqc_tin + dupradar = ch_dupradar_result // DupRadarResult record (5 -> 1) + + // RSeQC outputs (22 individual channels -> 1 record) + rseqc = ch_rseqc_result // RSeQCResult record // Contaminant screening outputs kraken_report = ch_kraken_report @@ -1262,61 +988,14 @@ workflow RNASEQ { bigwig_outputs = ch_bigwig_forward .mix(ch_bigwig_reverse) - // Pseudo-alignment outputs → ${params.pseudo_aligner} - pseudo_outputs = ch_pseudo_quant - .mix(ch_pseudo_tx2gene) - .mix(ch_pseudo_counts_gene) - .mix(ch_pseudo_counts_gene_length_scaled) - .mix(ch_pseudo_counts_gene_scaled) - .mix(ch_pseudo_counts_transcript) - .mix(ch_pseudo_lengths_gene) - .mix(ch_pseudo_lengths_transcript) - .mix(ch_pseudo_tpm_gene) - .mix(ch_pseudo_tpm_transcript) - .mix(ch_pseudo_merged_gene_rds) - .mix(ch_pseudo_merged_transcript_rds) - - // RSEM outputs → star_rsem (logs to star_rsem/log) - rsem_logs = ch_rsem_logs - rsem_results = ch_rsem_stat - .mix(ch_rsem_counts_gene) - .mix(ch_rsem_counts_transcript) - .mix(ch_rsem_tpm_gene) - .mix(ch_rsem_tpm_transcript) - .mix(ch_rsem_merged_counts_gene) - .mix(ch_rsem_merged_counts_transcript) - .mix(ch_rsem_merged_genes_long) - .mix(ch_rsem_merged_isoforms_long) - - // STAR-Salmon outputs → star_salmon - star_salmon_outputs = ch_star_salmon_quant - .mix(ch_star_salmon_tx2gene) - .mix(ch_star_salmon_counts_gene) - .mix(ch_star_salmon_counts_gene_length_scaled) - .mix(ch_star_salmon_counts_gene_scaled) - .mix(ch_star_salmon_counts_transcript) - .mix(ch_star_salmon_lengths_gene) - .mix(ch_star_salmon_lengths_transcript) - .mix(ch_star_salmon_tpm_gene) - .mix(ch_star_salmon_tpm_transcript) - .mix(ch_star_salmon_merged_gene_rds) - .mix(ch_star_salmon_merged_transcript_rds) - - // DESeq2 outputs (aligner-based) → ${params.aligner}/deseq2_qc - deseq2_outputs = ch_deseq2_pdf - .mix(ch_deseq2_rdata) - .mix(ch_deseq2_pca_txt) - .mix(ch_deseq2_dists_txt) - .mix(ch_deseq2_log) - .mix(ch_deseq2_size_factors) - - // DESeq2 outputs (pseudo-aligner) → ${params.pseudo_aligner}/deseq2_qc - pseudo_deseq2_outputs = ch_pseudo_deseq2_pdf - .mix(ch_pseudo_deseq2_rdata) - .mix(ch_pseudo_deseq2_pca_txt) - .mix(ch_pseudo_deseq2_dists_txt) - .mix(ch_pseudo_deseq2_log) - .mix(ch_pseudo_deseq2_size_factors) + // Quantification outputs (records replace mix chains) + pseudo = ch_pseudo_result // QuantResult record (12 -> 1) + rsem = ch_rsem_result // RsemResult record (10 -> 1) + star_salmon = ch_star_salmon_result // QuantResult record (12 -> 1) + + // DESeq2 outputs (records replace mix chains) + deseq2 = ch_deseq2_result // DeSeq2Result record (6 -> 1) + pseudo_deseq2 = ch_pseudo_deseq2_result // DeSeq2Result record (6 -> 1) } /*