|
| 1 | +// |
| 2 | +// Prepare reference indexing for downstream |
| 3 | +// |
| 4 | + |
| 5 | +include { REFERENCE_INDEXING_SINGLE } from '../../subworkflows/local/reference_indexing_single.nf' |
| 6 | +include { REFERENCE_INDEXING_MULTI } from '../../subworkflows/local/reference_indexing_multi.nf' |
| 7 | +include { GUNZIP as GUNZIP_PMDBED } from '../../modules/nf-core/gunzip/main.nf' |
| 8 | +include { GUNZIP as GUNZIP_PMDFASTA } from '../../modules/nf-core/gunzip/main.nf' |
| 9 | +include { GUNZIP as GUNZIP_SNPBED } from '../../modules/nf-core/gunzip/main.nf' |
| 10 | +include { ELONGATE_REFERENCE } from '../../subworkflows/local/elongate_reference.nf' |
| 11 | + |
| 12 | +workflow REFERENCE_INDEXING { |
| 13 | + take: |
| 14 | + fasta // file: /path/to/samplesheet.csv |
| 15 | + fasta_fai |
| 16 | + fasta_dict |
| 17 | + fasta_mapperindexdir |
| 18 | + |
| 19 | + main: |
| 20 | + ch_versions = Channel.empty() |
| 21 | + |
| 22 | + // Warn user if they've given a reference sheet that already includes fai/dict/mapper index etc. |
| 23 | + if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && ( fasta_fai || fasta_dict || fasta_mapperindexdir )) log.warn("A TSV or CSV has been supplied to `--fasta_sheet` as well as e.g. `--fasta_fai`. --fasta_sheet CSV/TSV takes priority and --fasta_* parameters will be ignored.") |
| 24 | + if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && ( params.mitochondrion_header || params.contamination_estimation_angsd_hapmap || params.damage_manipulation_pmdtools_reference_mask || params.damage_manipulation_pmdtools_reference_mask || params.snpcapture_bed || params.genotyping_pileupcaller_bedfile || params.genotyping_pileupcaller_snpfile || params.sexdeterrmine_bedfile || params.mapstats_bedtools_featurefile || params.genotyping_reference_ploidy || params.genotyping_gatk_dbsnp || params.fasta_circular_target || params.circularmapper_elongated_fasta || params.circularmapper_elongated_fai )) log.warn("A TSV or CSV has been supplied to `--fasta_sheet` as well as individual reference-specific input files, e.g. `--contamination_estimation_angsd_hapmap`. Input files specified in the --fasta_sheet CSV/TSV take priority and other input parameters will be ignored.") |
| 25 | + |
| 26 | + if ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) { |
| 27 | + // If input (multi-)reference sheet supplied |
| 28 | + REFERENCE_INDEXING_MULTI ( fasta ) |
| 29 | + ch_reference_for_mapping = REFERENCE_INDEXING_MULTI.out.reference |
| 30 | + ch_reference_to_elongate = REFERENCE_INDEXING_MULTI.out.elongated_reference |
| 31 | + ch_mitochondrion_header = REFERENCE_INDEXING_MULTI.out.mitochondrion_header |
| 32 | + ch_hapmap = REFERENCE_INDEXING_MULTI.out.hapmap |
| 33 | + ch_pmd_masked_fasta = REFERENCE_INDEXING_MULTI.out.pmd_masked_fasta |
| 34 | + ch_pmd_bed_for_masking = REFERENCE_INDEXING_MULTI.out.pmd_bed_for_masking |
| 35 | + ch_snp_capture_bed = REFERENCE_INDEXING_MULTI.out.snp_capture_bed |
| 36 | + ch_pileupcaller_bed_snp = REFERENCE_INDEXING_MULTI.out.pileupcaller_bed_snp |
| 37 | + ch_sexdeterrmine_bed = REFERENCE_INDEXING_MULTI.out.sexdeterrmine_bed |
| 38 | + ch_bedtools_feature = REFERENCE_INDEXING_MULTI.out.bedtools_feature |
| 39 | + ch_dbsnp = REFERENCE_INDEXING_MULTI.out.dbsnp |
| 40 | + ch_mva = REFERENCE_INDEXING_MULTI.out.mva |
| 41 | + ch_versions = ch_versions.mix( REFERENCE_INDEXING_MULTI.out.versions ) |
| 42 | + } else { |
| 43 | + // If input FASTA and/or indicies supplied |
| 44 | + REFERENCE_INDEXING_SINGLE ( fasta, fasta_fai, fasta_dict, fasta_mapperindexdir ) |
| 45 | + ch_reference_to_elongate = REFERENCE_INDEXING_SINGLE.out.elongated_reference |
| 46 | + ch_mitochondrion_header = REFERENCE_INDEXING_SINGLE.out.mitochondrion_header |
| 47 | + ch_hapmap = REFERENCE_INDEXING_SINGLE.out.hapmap |
| 48 | + ch_pmd_masked_fasta = REFERENCE_INDEXING_SINGLE.out.pmd_masked_fasta |
| 49 | + ch_pmd_bed_for_masking = REFERENCE_INDEXING_SINGLE.out.pmd_bed_for_masking |
| 50 | + ch_snp_capture_bed = REFERENCE_INDEXING_SINGLE.out.snp_capture_bed |
| 51 | + ch_pileupcaller_bed_snp = REFERENCE_INDEXING_SINGLE.out.pileupcaller_bed_snp |
| 52 | + ch_sexdeterrmine_bed = REFERENCE_INDEXING_SINGLE.out.sexdeterrmine_bed |
| 53 | + ch_bedtools_feature = REFERENCE_INDEXING_SINGLE.out.bedtools_feature |
| 54 | + ch_reference_for_mapping = REFERENCE_INDEXING_SINGLE.out.reference |
| 55 | + ch_dbsnp = REFERENCE_INDEXING_SINGLE.out.dbsnp |
| 56 | + ch_mva = REFERENCE_INDEXING_SINGLE.out.mva |
| 57 | + ch_versions = ch_versions.mix( REFERENCE_INDEXING_SINGLE.out.versions ) |
| 58 | + } |
| 59 | + |
| 60 | + // Filter out input options that are not provided and unzip if necessary |
| 61 | + ch_mitochondrion_header = ch_mitochondrion_header |
| 62 | + .filter{ it[1] != "" } |
| 63 | + |
| 64 | + ch_hapmap = ch_hapmap |
| 65 | + .filter{ it[1] != "" } |
| 66 | + |
| 67 | + ch_pmd_masked_fasta = ch_pmd_masked_fasta |
| 68 | + .branch { |
| 69 | + meta, pmd_masked_fasta -> |
| 70 | + input: pmd_masked_fasta != "" |
| 71 | + skip: true |
| 72 | + } |
| 73 | + ch_pmd_masked_fasta_gunzip = ch_pmd_masked_fasta.input |
| 74 | + .branch { |
| 75 | + meta, pmd_masked_fasta -> |
| 76 | + forgunzip: pmd_masked_fasta.extension == "gz" |
| 77 | + skip: true |
| 78 | + } |
| 79 | + GUNZIP_PMDFASTA( ch_pmd_masked_fasta_gunzip.forgunzip ) |
| 80 | + ch_pmd_masked_fasta = ch_pmd_masked_fasta_gunzip.skip.mix( GUNZIP_PMDFASTA.out.gunzip ).mix( ch_pmd_masked_fasta.skip ) |
| 81 | + ch_version = ch_versions.mix( GUNZIP_PMDFASTA.out.versions.first() ) |
| 82 | + |
| 83 | + ch_pmd_bed_for_masking = ch_pmd_bed_for_masking |
| 84 | + .branch { |
| 85 | + meta, pmd_bed_for_masking -> |
| 86 | + input: pmd_bed_for_masking != "" |
| 87 | + skip: true |
| 88 | + } |
| 89 | + ch_pmd_bed_for_masking_gunzip = ch_pmd_bed_for_masking.input |
| 90 | + .branch { |
| 91 | + meta, pmd_bed_for_masking -> |
| 92 | + forgunzip: pmd_bed_for_masking.extension == "gz" |
| 93 | + skip: true |
| 94 | + } |
| 95 | + GUNZIP_PMDBED( ch_pmd_bed_for_masking_gunzip.forgunzip ) |
| 96 | + ch_pmd_bed_for_masking = ch_pmd_bed_for_masking_gunzip.skip.mix( GUNZIP_PMDBED.out.gunzip ).mix( ch_pmd_bed_for_masking.skip ) |
| 97 | + ch_version = ch_versions.mix( GUNZIP_PMDBED.out.versions.first() ) |
| 98 | + |
| 99 | + ch_pmd_masking = ch_pmd_masked_fasta |
| 100 | + .combine( by: 0, ch_pmd_bed_for_masking ) |
| 101 | + |
| 102 | + ch_capture_bed = ch_snp_capture_bed //bed file input is optional, so no filtering |
| 103 | + .branch { |
| 104 | + meta, capture_bed -> |
| 105 | + input: capture_bed != "" |
| 106 | + skip: true |
| 107 | + } |
| 108 | + ch_capture_bed_gunzip = ch_capture_bed.input //unzip |
| 109 | + .branch { |
| 110 | + meta, capture_bed -> |
| 111 | + forgunzip: capture_bed.extension == "gz" |
| 112 | + skip: true |
| 113 | + } |
| 114 | + GUNZIP_SNPBED( ch_capture_bed_gunzip.forgunzip ) |
| 115 | + ch_capture_bed = GUNZIP_SNPBED.out.gunzip.mix( ch_capture_bed_gunzip.skip ).mix( ch_capture_bed.skip ) |
| 116 | + ch_version = ch_versions.mix( GUNZIP_SNPBED.out.versions.first() ) |
| 117 | + |
| 118 | + ch_pileupcaller_bed_snp = ch_pileupcaller_bed_snp |
| 119 | + .filter { it[1] != "" || it[2] != "" } // They go together or not at all. |
| 120 | + // Check if the channel is empty, and throw an error. Will only trigger for tsv fasta input. Single reference gets validated immediately. |
| 121 | + .ifEmpty { if(params.run_genotyping && params.genotyping_tool == 'pileupcaller') { error "[nf-core/eager] ERROR: Genotyping with pileupcaller requires that both '--genotyping_pileupcaller_bedfile' AND '--genotyping_pileupcaller_snpfile' are provided for at least one reference genome." } } |
| 122 | + .filter{ it != null } // Remove null channel which arises if empty cause error returns null. |
| 123 | + |
| 124 | + ch_sexdeterrmine_bed = ch_sexdeterrmine_bed |
| 125 | + .filter { it[1] != "" } |
| 126 | + |
| 127 | + ch_bedtools_feature = ch_bedtools_feature |
| 128 | + .filter { it[1] != "" } |
| 129 | + |
| 130 | + ch_dbsnp = ch_dbsnp |
| 131 | + .filter { it[1] != "" } |
| 132 | + |
| 133 | + // Elongate reference for circularmapper if requested |
| 134 | + if ( params.mapping_tool == "circularmapper" ) { |
| 135 | + // Throw errors if required parameters are missing |
| 136 | + // A circular target is required even when an elongated reference has been provided. |
| 137 | + ch_elongated_for_gunzip = ch_reference_to_elongate |
| 138 | + .filter{ |
| 139 | + meta, circular_target, circularmapper_elongatedfasta, circularmapper_elongatedindex -> |
| 140 | + circular_target != "" |
| 141 | + } |
| 142 | + .ifEmpty{ error "[nf-core/eager] ERROR: Mapping with circularmapper requires either a circular target for at least one reference." } |
| 143 | + |
| 144 | + // This ELONGATE_REFERENCE subworkflow also checks if the provided reference is gzipped, and unzips it if necessary. |
| 145 | + ELONGATE_REFERENCE( ch_reference_for_mapping, ch_reference_to_elongate ) |
| 146 | + ch_version = ch_versions.mix( ELONGATE_REFERENCE.out.versions ) |
| 147 | + ch_elongated_indexed_reference = ELONGATE_REFERENCE.out.circular_reference |
| 148 | + ch_elongated_chr_list = ELONGATE_REFERENCE.out.elongated_chr_list |
| 149 | + |
| 150 | + } else { |
| 151 | + ch_elongated_indexed_reference = ch_reference_to_elongate |
| 152 | + ch_elongated_chr_list = Channel.empty() |
| 153 | + } |
| 154 | + |
| 155 | + emit: |
| 156 | + reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex ] |
| 157 | + elongated_reference = ch_elongated_indexed_reference // [ meta, circularmapper_elongated_fasta, circularmapper_elongated_index ] |
| 158 | + elongated_chr_list = ch_elongated_chr_list // [ meta, elongated_chr_list ] |
| 159 | + mitochondrion_header = ch_mitochondrion_header // [ meta, mitochondrion_header ] |
| 160 | + hapmap = ch_hapmap // [ meta, hapmap ] |
| 161 | + pmd_masking = ch_pmd_masking // [ meta, pmd_masked_fasta, pmd_bed_for_masking ] |
| 162 | + pmd_bed_for_masking = ch_pmd_bed_for_masking // [ meta, pmd_bed_for_masking ] |
| 163 | + snp_capture_bed = ch_capture_bed // [ meta, capture_bed ] |
| 164 | + pileupcaller_bed_snp = ch_pileupcaller_bed_snp // [ meta, pileupcaller_bed, pileupcaller_snp ] |
| 165 | + sexdeterrmine_bed = ch_sexdeterrmine_bed // [ meta, sexdet_bed ] |
| 166 | + bedtools_feature = ch_bedtools_feature // [ meta, bedtools_feature ] |
| 167 | + dbsnp = ch_dbsnp // [ meta, dbsnp ] |
| 168 | + mva = ch_mva // [ meta, consensus_multivcfanalyzer_additional_vcf_files, consensus_multivcfanalyzer_reference_gff_annotations, consensus_multivcfanalyzer_reference_gff_exclude, consensus_multivcfanalyzer_reference_snpeff_results ] |
| 169 | + versions = ch_versions |
| 170 | + |
| 171 | +} |
0 commit comments