Skip to content

Commit 3df49f1

Browse files
committed
Merge branch 'mva' of github.com:aidaanva/eager into mva
1 parent d7bb376 commit 3df49f1

3 files changed

Lines changed: 513 additions & 0 deletions

File tree

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
//
2+
// Prepare reference indexing for downstream
3+
//
4+
5+
include { REFERENCE_INDEXING_SINGLE } from '../../subworkflows/local/reference_indexing_single.nf'
6+
include { REFERENCE_INDEXING_MULTI } from '../../subworkflows/local/reference_indexing_multi.nf'
7+
include { GUNZIP as GUNZIP_PMDBED } from '../../modules/nf-core/gunzip/main.nf'
8+
include { GUNZIP as GUNZIP_PMDFASTA } from '../../modules/nf-core/gunzip/main.nf'
9+
include { GUNZIP as GUNZIP_SNPBED } from '../../modules/nf-core/gunzip/main.nf'
10+
include { ELONGATE_REFERENCE } from '../../subworkflows/local/elongate_reference.nf'
11+
12+
workflow REFERENCE_INDEXING {
13+
take:
14+
fasta // file: /path/to/samplesheet.csv
15+
fasta_fai
16+
fasta_dict
17+
fasta_mapperindexdir
18+
19+
main:
20+
ch_versions = Channel.empty()
21+
22+
// Warn user if they've given a reference sheet that already includes fai/dict/mapper index etc.
23+
if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && ( fasta_fai || fasta_dict || fasta_mapperindexdir )) log.warn("A TSV or CSV has been supplied to `--fasta_sheet` as well as e.g. `--fasta_fai`. --fasta_sheet CSV/TSV takes priority and --fasta_* parameters will be ignored.")
24+
if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && ( params.mitochondrion_header || params.contamination_estimation_angsd_hapmap || params.damage_manipulation_pmdtools_reference_mask || params.damage_manipulation_pmdtools_reference_mask || params.snpcapture_bed || params.genotyping_pileupcaller_bedfile || params.genotyping_pileupcaller_snpfile || params.sexdeterrmine_bedfile || params.mapstats_bedtools_featurefile || params.genotyping_reference_ploidy || params.genotyping_gatk_dbsnp || params.fasta_circular_target || params.circularmapper_elongated_fasta || params.circularmapper_elongated_fai )) log.warn("A TSV or CSV has been supplied to `--fasta_sheet` as well as individual reference-specific input files, e.g. `--contamination_estimation_angsd_hapmap`. Input files specified in the --fasta_sheet CSV/TSV take priority and other input parameters will be ignored.")
25+
26+
if ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) {
27+
// If input (multi-)reference sheet supplied
28+
REFERENCE_INDEXING_MULTI ( fasta )
29+
ch_reference_for_mapping = REFERENCE_INDEXING_MULTI.out.reference
30+
ch_reference_to_elongate = REFERENCE_INDEXING_MULTI.out.elongated_reference
31+
ch_mitochondrion_header = REFERENCE_INDEXING_MULTI.out.mitochondrion_header
32+
ch_hapmap = REFERENCE_INDEXING_MULTI.out.hapmap
33+
ch_pmd_masked_fasta = REFERENCE_INDEXING_MULTI.out.pmd_masked_fasta
34+
ch_pmd_bed_for_masking = REFERENCE_INDEXING_MULTI.out.pmd_bed_for_masking
35+
ch_snp_capture_bed = REFERENCE_INDEXING_MULTI.out.snp_capture_bed
36+
ch_pileupcaller_bed_snp = REFERENCE_INDEXING_MULTI.out.pileupcaller_bed_snp
37+
ch_sexdeterrmine_bed = REFERENCE_INDEXING_MULTI.out.sexdeterrmine_bed
38+
ch_bedtools_feature = REFERENCE_INDEXING_MULTI.out.bedtools_feature
39+
ch_dbsnp = REFERENCE_INDEXING_MULTI.out.dbsnp
40+
ch_mva = REFERENCE_INDEXING_MULTI.out.mva
41+
ch_versions = ch_versions.mix( REFERENCE_INDEXING_MULTI.out.versions )
42+
} else {
43+
// If input FASTA and/or indicies supplied
44+
REFERENCE_INDEXING_SINGLE ( fasta, fasta_fai, fasta_dict, fasta_mapperindexdir )
45+
ch_reference_to_elongate = REFERENCE_INDEXING_SINGLE.out.elongated_reference
46+
ch_mitochondrion_header = REFERENCE_INDEXING_SINGLE.out.mitochondrion_header
47+
ch_hapmap = REFERENCE_INDEXING_SINGLE.out.hapmap
48+
ch_pmd_masked_fasta = REFERENCE_INDEXING_SINGLE.out.pmd_masked_fasta
49+
ch_pmd_bed_for_masking = REFERENCE_INDEXING_SINGLE.out.pmd_bed_for_masking
50+
ch_snp_capture_bed = REFERENCE_INDEXING_SINGLE.out.snp_capture_bed
51+
ch_pileupcaller_bed_snp = REFERENCE_INDEXING_SINGLE.out.pileupcaller_bed_snp
52+
ch_sexdeterrmine_bed = REFERENCE_INDEXING_SINGLE.out.sexdeterrmine_bed
53+
ch_bedtools_feature = REFERENCE_INDEXING_SINGLE.out.bedtools_feature
54+
ch_reference_for_mapping = REFERENCE_INDEXING_SINGLE.out.reference
55+
ch_dbsnp = REFERENCE_INDEXING_SINGLE.out.dbsnp
56+
ch_mva = REFERENCE_INDEXING_SINGLE.out.mva
57+
ch_versions = ch_versions.mix( REFERENCE_INDEXING_SINGLE.out.versions )
58+
}
59+
60+
// Filter out input options that are not provided and unzip if necessary
61+
ch_mitochondrion_header = ch_mitochondrion_header
62+
.filter{ it[1] != "" }
63+
64+
ch_hapmap = ch_hapmap
65+
.filter{ it[1] != "" }
66+
67+
ch_pmd_masked_fasta = ch_pmd_masked_fasta
68+
.branch {
69+
meta, pmd_masked_fasta ->
70+
input: pmd_masked_fasta != ""
71+
skip: true
72+
}
73+
ch_pmd_masked_fasta_gunzip = ch_pmd_masked_fasta.input
74+
.branch {
75+
meta, pmd_masked_fasta ->
76+
forgunzip: pmd_masked_fasta.extension == "gz"
77+
skip: true
78+
}
79+
GUNZIP_PMDFASTA( ch_pmd_masked_fasta_gunzip.forgunzip )
80+
ch_pmd_masked_fasta = ch_pmd_masked_fasta_gunzip.skip.mix( GUNZIP_PMDFASTA.out.gunzip ).mix( ch_pmd_masked_fasta.skip )
81+
ch_version = ch_versions.mix( GUNZIP_PMDFASTA.out.versions.first() )
82+
83+
ch_pmd_bed_for_masking = ch_pmd_bed_for_masking
84+
.branch {
85+
meta, pmd_bed_for_masking ->
86+
input: pmd_bed_for_masking != ""
87+
skip: true
88+
}
89+
ch_pmd_bed_for_masking_gunzip = ch_pmd_bed_for_masking.input
90+
.branch {
91+
meta, pmd_bed_for_masking ->
92+
forgunzip: pmd_bed_for_masking.extension == "gz"
93+
skip: true
94+
}
95+
GUNZIP_PMDBED( ch_pmd_bed_for_masking_gunzip.forgunzip )
96+
ch_pmd_bed_for_masking = ch_pmd_bed_for_masking_gunzip.skip.mix( GUNZIP_PMDBED.out.gunzip ).mix( ch_pmd_bed_for_masking.skip )
97+
ch_version = ch_versions.mix( GUNZIP_PMDBED.out.versions.first() )
98+
99+
ch_pmd_masking = ch_pmd_masked_fasta
100+
.combine( by: 0, ch_pmd_bed_for_masking )
101+
102+
ch_capture_bed = ch_snp_capture_bed //bed file input is optional, so no filtering
103+
.branch {
104+
meta, capture_bed ->
105+
input: capture_bed != ""
106+
skip: true
107+
}
108+
ch_capture_bed_gunzip = ch_capture_bed.input //unzip
109+
.branch {
110+
meta, capture_bed ->
111+
forgunzip: capture_bed.extension == "gz"
112+
skip: true
113+
}
114+
GUNZIP_SNPBED( ch_capture_bed_gunzip.forgunzip )
115+
ch_capture_bed = GUNZIP_SNPBED.out.gunzip.mix( ch_capture_bed_gunzip.skip ).mix( ch_capture_bed.skip )
116+
ch_version = ch_versions.mix( GUNZIP_SNPBED.out.versions.first() )
117+
118+
ch_pileupcaller_bed_snp = ch_pileupcaller_bed_snp
119+
.filter { it[1] != "" || it[2] != "" } // They go together or not at all.
120+
// Check if the channel is empty, and throw an error. Will only trigger for tsv fasta input. Single reference gets validated immediately.
121+
.ifEmpty { if(params.run_genotyping && params.genotyping_tool == 'pileupcaller') { error "[nf-core/eager] ERROR: Genotyping with pileupcaller requires that both '--genotyping_pileupcaller_bedfile' AND '--genotyping_pileupcaller_snpfile' are provided for at least one reference genome." } }
122+
.filter{ it != null } // Remove null channel which arises if empty cause error returns null.
123+
124+
ch_sexdeterrmine_bed = ch_sexdeterrmine_bed
125+
.filter { it[1] != "" }
126+
127+
ch_bedtools_feature = ch_bedtools_feature
128+
.filter { it[1] != "" }
129+
130+
ch_dbsnp = ch_dbsnp
131+
.filter { it[1] != "" }
132+
133+
// Elongate reference for circularmapper if requested
134+
if ( params.mapping_tool == "circularmapper" ) {
135+
// Throw errors if required parameters are missing
136+
// A circular target is required even when an elongated reference has been provided.
137+
ch_elongated_for_gunzip = ch_reference_to_elongate
138+
.filter{
139+
meta, circular_target, circularmapper_elongatedfasta, circularmapper_elongatedindex ->
140+
circular_target != ""
141+
}
142+
.ifEmpty{ error "[nf-core/eager] ERROR: Mapping with circularmapper requires either a circular target for at least one reference." }
143+
144+
// This ELONGATE_REFERENCE subworkflow also checks if the provided reference is gzipped, and unzips it if necessary.
145+
ELONGATE_REFERENCE( ch_reference_for_mapping, ch_reference_to_elongate )
146+
ch_version = ch_versions.mix( ELONGATE_REFERENCE.out.versions )
147+
ch_elongated_indexed_reference = ELONGATE_REFERENCE.out.circular_reference
148+
ch_elongated_chr_list = ELONGATE_REFERENCE.out.elongated_chr_list
149+
150+
} else {
151+
ch_elongated_indexed_reference = ch_reference_to_elongate
152+
ch_elongated_chr_list = Channel.empty()
153+
}
154+
155+
emit:
156+
reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex ]
157+
elongated_reference = ch_elongated_indexed_reference // [ meta, circularmapper_elongated_fasta, circularmapper_elongated_index ]
158+
elongated_chr_list = ch_elongated_chr_list // [ meta, elongated_chr_list ]
159+
mitochondrion_header = ch_mitochondrion_header // [ meta, mitochondrion_header ]
160+
hapmap = ch_hapmap // [ meta, hapmap ]
161+
pmd_masking = ch_pmd_masking // [ meta, pmd_masked_fasta, pmd_bed_for_masking ]
162+
pmd_bed_for_masking = ch_pmd_bed_for_masking // [ meta, pmd_bed_for_masking ]
163+
snp_capture_bed = ch_capture_bed // [ meta, capture_bed ]
164+
pileupcaller_bed_snp = ch_pileupcaller_bed_snp // [ meta, pileupcaller_bed, pileupcaller_snp ]
165+
sexdeterrmine_bed = ch_sexdeterrmine_bed // [ meta, sexdet_bed ]
166+
bedtools_feature = ch_bedtools_feature // [ meta, bedtools_feature ]
167+
dbsnp = ch_dbsnp // [ meta, dbsnp ]
168+
mva = ch_mva // [ meta, consensus_multivcfanalyzer_additional_vcf_files, consensus_multivcfanalyzer_reference_gff_annotations, consensus_multivcfanalyzer_reference_gff_exclude, consensus_multivcfanalyzer_reference_snpeff_results ]
169+
versions = ch_versions
170+
171+
}

0 commit comments

Comments
 (0)