Skip to content

Commit 71f4510

Browse files
committed
working RG removal branching for mpileupcaller
1 parent 6eb67c4 commit 71f4510

3 files changed

Lines changed: 113 additions & 67 deletions

File tree

subworkflows/local/genotype.nf

Lines changed: 108 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Genotype the input data using the requested genotyper.
33
//
44

5+
include { PICARD_ADDORREPLACEREADGROUPS } from '../../modules/nf-core/picard/addorreplacereadgroups/main'
56
include { SAMTOOLS_MPILEUP as SAMTOOLS_MPILEUP_PILEUPCALLER } from '../../modules/nf-core/samtools/mpileup/main'
67
include { EIGENSTRATDATABASETOOLS_EIGENSTRATSNPCOVERAGE } from '../../modules/nf-core/eigenstratdatabasetools/eigenstratsnpcoverage/main'
78
include { SEQUENCETOOLS_PILEUPCALLER } from '../../modules/nf-core/sequencetools/pileupcaller/main'
@@ -35,6 +36,13 @@ workflow GENOTYPE {
3536

3637
if ( params.genotyping_tool == 'pileupcaller' ) {
3738

39+
// TODO: if running per-library, necessary to update readgroups
40+
// reassign readgroups to be unique per library (eg rewrite any SM field from sample to SAMPLEID_LIBID)
41+
// some tools (eg pileupcaller) break when multiple .bams input have the same SM field for read groups
42+
// PICARD_ADDORREPLACEREADGROUPS(ch_bams_for_genotyping)
43+
// ch_bams_for_genotyping = PICARD_ADDORREPLACEREADGROUPS.out
44+
45+
3846
// Compile together all reference based files
3947
ch_refs_prep = ch_fasta_plus
4048
// Because aux files are optional, the channel can be [[],[],[]]. remainder:true will output both the empty list and the fasta_plus channel with an added 'null'.
@@ -61,85 +69,118 @@ workflow GENOTYPE {
6169
} // RESULT: [ [combination_meta], [ref_meta], fasta, fai, dict, bed, snp ]
6270

6371
// Prepare collect bams for mpileup
64-
ch_mpileup_inputs_bams = ch_bam_bai
72+
// Recreate Read Group headers if no genotyping input is library-by-library
73+
if ( params.genotyping_use_unmerged_libraries ) {
74+
ch_input_for_rewriting_readgroups = ch_bam_bai
6575
.map {
66-
addNewMetaFromAttributes( it, ["reference", "strandedness"] , ["reference", "strandedness"] , false )
76+
addNewMetaFromAttributes( it, "reference", "reference" , false )
77+
}
78+
.combine( ch_refs_for_mpileup_pileupcaller , by:0 )
79+
.multiMap {
80+
ignore_me, combo_meta, bams, bais, ref_meta, fasta, fai, dict, bed, snp ->
81+
bams: [ combo_meta, bams ]
82+
fasta: [ ref_meta, fasta ]
83+
fai: [ ref_meta, fai ]
6784
}
68-
.groupTuple()
69-
.map {
70-
combo_meta, metas, bams, bais ->
71-
def ids = metas.collect { meta -> meta.sample_id }
72-
[ combo_meta + [sample_id: ids], bams ] // Drop bais
73-
} // Collect all IDs into a list in meta.sample_id. Useful when running pileupCaller later
7485

75-
// Combine prepped bams and references
76-
ch_mpileup_inputs = ch_mpileup_inputs_bams
77-
.map {
78-
addNewMetaFromAttributes( it, "reference", "reference" , false )
79-
}
80-
.combine( ch_refs_for_mpileup_pileupcaller , by:0 )
81-
// do not run if no bed file is provided
82-
.filter { it[7] != []}
83-
.multiMap {
84-
ignore_me, combo_meta, bams, ref_meta, fasta, fai, dict, bed, snp ->
85-
def bedfile = bed != "" ? bed : []
86-
bams: [ combo_meta, bams, bedfile ]
87-
fasta: [ fasta ]
88-
}
86+
PICARD_ADDORREPLACEREADGROUPS(ch_input_for_rewriting_readgroups.bams, ch_input_for_rewriting_readgroups.fasta, ch_input_for_rewriting_readgroups.fai)
8987

90-
SAMTOOLS_MPILEUP_PILEUPCALLER(
91-
ch_mpileup_inputs.bams,
92-
ch_mpileup_inputs.fasta,
93-
)
94-
ch_versions = ch_versions.mix( SAMTOOLS_MPILEUP_PILEUPCALLER.out.versions.first() )
95-
96-
ch_pileupcaller_input = SAMTOOLS_MPILEUP_PILEUPCALLER.out.mpileup
88+
ch_mpileup_inputs_bams = PICARD_ADDORREPLACEREADGROUPS.out.bam
9789
.map {
98-
addNewMetaFromAttributes( it, "reference", "reference" , false )
99-
}
100-
.combine( ch_refs_for_mpileup_pileupcaller, by:0 )
101-
.multiMap {
102-
ignore_me, meta, mpileup, ref_meta, fasta, fai, dict, bed, snp ->
103-
// def snpfile = snp != "" ? snp : []
104-
mpileup: [ meta, mpileup ]
105-
snpfile: snp
90+
addNewMetaFromAttributes( it, ["reference", "strandedness"] , ["reference", "strandedness"] , false )
10691
}
107-
108-
// Run PileupCaller
109-
SEQUENCETOOLS_PILEUPCALLER(
110-
ch_pileupcaller_input.mpileup,
111-
ch_pileupcaller_input.snpfile,
112-
[]
113-
)
114-
ch_versions = ch_versions.mix( SEQUENCETOOLS_PILEUPCALLER.out.versions.first() )
115-
116-
// Merge/rename genotyping datasets
117-
ch_final_genotypes = SEQUENCETOOLS_PILEUPCALLER.out.eigenstrat
92+
.groupTuple()
11893
.map {
119-
addNewMetaFromAttributes( it, "reference" , "reference" , false )
94+
combo_meta, metas, bams ->
95+
def ids = metas.collect { meta -> meta.library_id }
96+
[ combo_meta + [sample_id: ids], bams ]
97+
} // Collect all LIBRARY IDs into a list in meta.sample_id. Useful when running pileupCaller later
98+
// distinct from running merged libraries as single sample -- libraries must be unique
99+
}
100+
else {
101+
ch_mpileup_inputs_bams = ch_bam_bai
102+
.map {
103+
addNewMetaFromAttributes( it, ["reference", "strandedness"] , ["reference", "strandedness"] , false )
120104
}
121105
.groupTuple()
122106
.map {
123-
combo_meta, metas, geno, snp, ind ->
124-
[ combo_meta, geno, snp, ind ]
125-
}
107+
combo_meta, metas, bams, bais ->
108+
def ids = metas.collect { meta -> meta.sample_id }
109+
[ combo_meta + [sample_id: ids], bams ] // Drop bais
110+
} // Collect all IDs into a list in meta.sample_id. Useful when running pileupCaller later
111+
}
112+
113+
ch_mpileup_inputs_bams.view()
126114

127-
COLLECT_GENOTYPES( ch_final_genotypes )
128-
// Add genotyper info to the meta
129-
ch_pileupcaller_genotypes = COLLECT_GENOTYPES.out.collected
115+
// Combine prepped bams and references
116+
ch_mpileup_inputs = ch_mpileup_inputs_bams
130117
.map {
131-
meta, geno, snp, ind ->
132-
[ meta + [ genotyper: "pileupcaller" ], geno , snp, ind ]
118+
addNewMetaFromAttributes( it, "reference", "reference" , false )
119+
}
120+
.combine( ch_refs_for_mpileup_pileupcaller , by:0 )
121+
// do not run if no bed file is provided
122+
.filter { it[7] != []}
123+
.multiMap {
124+
ignore_me, combo_meta, bams, ref_meta, fasta, fai, dict, bed, snp ->
125+
def bedfile = bed != "" ? bed : []
126+
bams: [ combo_meta, bams, bedfile ]
127+
fasta: [ fasta ]
133128
}
134-
ch_versions = ch_versions.mix( COLLECT_GENOTYPES.out.versions.first() )
135129

136-
// Calculate coverage stats for collected eigenstrat dataset
137-
EIGENSTRATDATABASETOOLS_EIGENSTRATSNPCOVERAGE(
138-
ch_pileupcaller_genotypes
139-
)
140-
ch_eigenstrat_coverage_stats = EIGENSTRATDATABASETOOLS_EIGENSTRATSNPCOVERAGE.out.tsv
141-
ch_versions = ch_versions.mix( EIGENSTRATDATABASETOOLS_EIGENSTRATSNPCOVERAGE.out.versions.first() )
142-
ch_multiqc_files = ch_multiqc_files.mix( EIGENSTRATDATABASETOOLS_EIGENSTRATSNPCOVERAGE.out.json )
130+
131+
SAMTOOLS_MPILEUP_PILEUPCALLER(
132+
ch_mpileup_inputs.bams,
133+
ch_mpileup_inputs.fasta,
134+
)
135+
ch_versions = ch_versions.mix( SAMTOOLS_MPILEUP_PILEUPCALLER.out.versions.first() )
136+
137+
ch_pileupcaller_input = SAMTOOLS_MPILEUP_PILEUPCALLER.out.mpileup
138+
.map {
139+
addNewMetaFromAttributes( it, "reference", "reference" , false )
140+
}
141+
.combine( ch_refs_for_mpileup_pileupcaller, by:0 )
142+
.multiMap {
143+
ignore_me, meta, mpileup, ref_meta, fasta, fai, dict, bed, snp ->
144+
// def snpfile = snp != "" ? snp : []
145+
mpileup: [ meta, mpileup ]
146+
snpfile: snp
147+
}
148+
149+
// Run PileupCaller
150+
SEQUENCETOOLS_PILEUPCALLER(
151+
ch_pileupcaller_input.mpileup,
152+
ch_pileupcaller_input.snpfile,
153+
[]
154+
)
155+
ch_versions = ch_versions.mix( SEQUENCETOOLS_PILEUPCALLER.out.versions.first() )
156+
157+
// Merge/rename genotyping datasets
158+
ch_final_genotypes = SEQUENCETOOLS_PILEUPCALLER.out.eigenstrat
159+
.map {
160+
addNewMetaFromAttributes( it, "reference" , "reference" , false )
161+
}
162+
.groupTuple()
163+
.map {
164+
combo_meta, metas, geno, snp, ind ->
165+
[ combo_meta, geno, snp, ind ]
166+
}
167+
168+
COLLECT_GENOTYPES( ch_final_genotypes )
169+
// Add genotyper info to the meta
170+
ch_pileupcaller_genotypes = COLLECT_GENOTYPES.out.collected
171+
.map {
172+
meta, geno, snp, ind ->
173+
[ meta + [ genotyper: "pileupcaller" ], geno , snp, ind ]
174+
}
175+
ch_versions = ch_versions.mix( COLLECT_GENOTYPES.out.versions.first() )
176+
177+
// Calculate coverage stats for collected eigenstrat dataset
178+
EIGENSTRATDATABASETOOLS_EIGENSTRATSNPCOVERAGE(
179+
ch_pileupcaller_genotypes
180+
)
181+
ch_eigenstrat_coverage_stats = EIGENSTRATDATABASETOOLS_EIGENSTRATSNPCOVERAGE.out.tsv
182+
ch_versions = ch_versions.mix( EIGENSTRATDATABASETOOLS_EIGENSTRATSNPCOVERAGE.out.versions.first() )
183+
ch_multiqc_files = ch_multiqc_files.mix( EIGENSTRATDATABASETOOLS_EIGENSTRATSNPCOVERAGE.out.json )
143184
}
144185

145186
if ( params.genotyping_tool == 'ug' ) {
@@ -401,6 +442,8 @@ workflow GENOTYPE {
401442
fasta: [ ref_meta, fasta ]
402443
}
403444

445+
ch_input_for_angsd.bam.view()
446+
404447
ANGSD_GL(
405448
ch_input_for_angsd.bam,
406449
ch_input_for_angsd.fasta,

subworkflows/local/utils_nfcore_eager_pipeline/main.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ def validateInputParameters() {
243243
if ( params.genotyping_source == 'pmd' && ! params.run_pmd_filtering ) { exit 1, ("[nf-core/eager] ERROR: --genotyping_source cannot be 'pmd' unless PMD-filtering is ran.") }
244244
if ( params.genotyping_source == 'rescaled' && ! params.run_mapdamage_rescaling ) { exit 1, ("[nf-core/eager] ERROR: --genotyping_source cannot be 'rescaled' unless aDNA damage rescaling is ran.") }
245245
if ( ( params.genotyping_source == 'rescaled' || params.genotyping_source == 'pmd_timmed' || params.genotyping_source == 'pmd' ) && ! params.genotyping_use_unmerged_libraries ) { log.warn("[nf-core/eager] WARNING: Combining multiple libraries with rescaled damage for genotyping may be inappropriate!") }
246-
if ( params.fasta && params.run_genotyping && params.genotyping_tool == 'pileupcaller' && ! (params.genotyping_pileupcaller_bedfile || params.genotyping_pileupcaller_snpfile ) ) { exit 1, ("[nf-core/eager] ERROR: Genotyping with pileupcaller requires both '--genotyping_pileupcaller_bedfile' AND '--genotyping_pileupcaller_snpfile' to be provided.") }
246+
// if ( params.fasta && params.run_genotyping && params.genotyping_tool == 'pileupcaller' && ! (params.genotyping_pileupcaller_bedfile || params.genotyping_pileupcaller_snpfile ) ) { exit 1, ("[nf-core/eager] ERROR: Genotyping with pileupcaller requires both '--genotyping_pileupcaller_bedfile' AND '--genotyping_pileupcaller_snpfile' to be provided.") }
247247
if ( params.fasta && params.mapping_tool == "circularmapper" && !params.fasta_circular_target ) { exit 1, ("[nf-core/eager] ERROR: Mapping with circularmapper requires --fasta_circular_target to be provided.") }
248248

249249
// metagenomics

workflows/eager.nf

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,9 @@ workflow EAGER {
540540
if ( params.genotyping_use_unmerged_libraries ) {
541541
// Get UNMERGED data from either initial mapping (post deduplication/filtering, if done), or post-damage manipulation after deduplication/filtering (if done) -- Note: the .out channels include libraries that are not damage manipulated (eg UDG-Full))
542542
ch_bams_for_genotyping = params.genotyping_source == 'rescaled' ? MANIPULATE_DAMAGE.out.rescaled : params.genotyping_source == 'pmd' ? MANIPULATE_DAMAGE.out.filtered : params.genotyping_source == 'trimmed' ? MANIPULATE_DAMAGE.out.trimmed : params.genotyping_source == 'pmd_trimmed' ? MANIPULATE_DAMAGE.out.trimmed : ch_dedupped_bams
543+
544+
// ch_bams_for_genotyping.view()
545+
543546
}
544547
else {
545548
// Genotyping done on MERGED data, regardless of UDG-treatment vs not; damage manipulation per-library!
@@ -553,7 +556,7 @@ workflow EAGER {
553556
[meta, fasta, fai, dict]
554557
}
555558

556-
ch_reference_for_genotyping.view()
559+
// ch_reference_for_genotyping.view()
557560

558561
GENOTYPE(
559562
ch_bams_for_genotyping,

0 commit comments

Comments
 (0)