Skip to content

Commit f3ca5fc

Browse files
authored
Merge pull request #526 from TCLamnidis/dev
Nuclear contamination per library in MQC, plus fixes and cleanup of channels by james
2 parents f06df88 + 549450d commit f3ca5fc

3 files changed

Lines changed: 25 additions & 19 deletions

File tree

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
4343
* [#473](https://github.com/nf-core/eager/issues/473) - Fixed bug in sexdet_process on AWS
4444
* [#444](https://github.com/nf-core/eager/issues/444) - Provide option for preserving realigned bam + index
4545
* Increase MultiQC process memory requirements to ensure enough memory for large runs
46+
* Fixed deduplication output logic. Will now pass along only the post-rmdup bams if duplicate removal is not skipped, instead of both the post-rmdup and pre-rmdup bams.
4647
* [#497](https://github.com/nf-core/eager/issues/497) - Simplifies number of parameters required to run bam filtering
4748
* [#501](https://github.com/nf-core/eager/issues/501) - Adds additional validation checks for MALT/MaltExtract database input files
4849
* [#508](https://github.com/nf-core/eager/issues/508) - Made Markduplicates default dedupper due to narrower context specificity of dedup
4950
* [#516](https://github.com/nf-core/eager/issues/516) - Made bedtools not report out of memory exit code when warning of inconsistant FASTA/Bed entry names
51+
* Nuclear contamination is now reported with the correct library names.
5052

5153
### `Dependencies`
5254

bin/print_x_contamination.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ def make_float(x):
3737
Ind=re.sub('\.X.contamination.out$', '', fn).split("/")[-1]
3838
for line in f:
3939
fields=line.strip().split()
40-
if line.strip()[0:19] == "We have nSNP sites:":
41-
nSNPs=fields[4][:-1]
40+
if line.strip()[0:21] == "[readicnts] Has read:":
41+
nSNPs=fields[4]
4242
elif line.strip()[0:7] == "Method1" and line.strip()[9:16] == 'new_llh':
4343
mom1=fields[3].split(":")[1]
4444
err_mom1=fields[4].split(":")[1]

main.nf

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1438,7 +1438,7 @@ process circularmapper{
14381438
file fasta from ch_fasta_for_circularmapper.collect()
14391439

14401440
output:
1441-
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.mapped.bam"), file("*.{bai,csi}") into ch_output_from_cm, ch_outputindex_from_cm
1441+
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.mapped.bam"), file("*.{bai,csi}") into ch_output_from_cm
14421442

14431443
when:
14441444
params.mapper == 'circularmapper'
@@ -1717,7 +1717,7 @@ process samtools_filter {
17171717
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_seqtypemerged_for_samtools_filter
17181718

17191719
output:
1720-
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*filtered.bam"), file("*.{bai,csi}") into ch_output_from_filtering,ch_outputindex_from_filtering
1720+
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*filtered.bam"), file("*.{bai,csi}") into ch_output_from_filtering
17211721
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.unmapped.fastq.gz") optional true into ch_bam_filtering_for_metagenomic
17221722
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.unmapped.bam") optional true
17231723

@@ -1905,9 +1905,13 @@ process markduplicates{
19051905

19061906
// This is for post-deduplcation per-library evaluation steps _without_ any
19071907
// form of library merging.
1908-
ch_skiprmdup_for_libeval.mix(ch_dedup_for_libeval, ch_markdup_for_libeval)
1909-
.into{ ch_rmdup_for_preseq; ch_rmdup_for_damageprofiler; ch_for_nuclear_contamination; ch_rmdup_formtnucratio }
1910-
1908+
if ( params.skip_deduplication ) {
1909+
ch_skiprmdup_for_libeval.mix(ch_dedup_for_libeval, ch_markdup_for_libeval)
1910+
.into{ ch_rmdup_for_preseq; ch_rmdup_for_damageprofiler; ch_for_nuclear_contamination; ch_rmdup_formtnucratio }
1911+
} else {
1912+
ch_dedup_for_libeval.mix(ch_markdup_for_libeval)
1913+
.into{ ch_rmdup_for_preseq; ch_rmdup_for_damageprofiler; ch_for_nuclear_contamination; ch_rmdup_formtnucratio }
1914+
}
19111915

19121916
// Merge independent libraries sequenced but with same treatment (often done to improve complexity). Different strand/UDG libs not merged because bamtrim/pmdtools needs UDG info
19131917

@@ -2547,16 +2551,16 @@ if (params.additional_vcf_files == '') {
25472551
file fasta from ch_fasta_for_multivcfanalyzer.collect()
25482552

25492553
output:
2550-
file('fullAlignment.fasta.gz') into ch_output_multivcfanalyzer_fullalignment
2551-
file('info.txt.gz') into ch_output_multivcfanalyzer_info
2552-
file('snpAlignment.fasta.gz') into ch_output_multivcfanalyzer_snpalignment
2553-
file('snpAlignmentIncludingRefGenome.fasta.gz') into ch_output_multivcfanalyzer_snpalignmentref
2554-
file('snpStatistics.tsv.gz') into ch_output_multivcfanalyzer_snpstatistics
2555-
file('snpTable.tsv.gz') into ch_output_multivcfanalyzer_snptable
2556-
file('snpTableForSnpEff.tsv.gz') into ch_output_multivcfanalyzer_snptablesnpeff
2557-
file('snpTableWithUncertaintyCalls.tsv.gz') into ch_output_multivcfanalyzer_snptableuncertainty
2558-
file('structureGenotypes.tsv.gz') into ch_output_multivcfanalyzer_structuregenotypes
2559-
file('structureGenotypes_noMissingData-Columns.tsv.gz') into ch_output_multivcfanalyzer_structuregenotypesclean
2554+
file('fullAlignment.fasta.gz')
2555+
file('info.txt.gz')
2556+
file('snpAlignment.fasta.gz')
2557+
file('snpAlignmentIncludingRefGenome.fasta.gz')
2558+
file('snpStatistics.tsv.gz')
2559+
file('snpTable.tsv.gz')
2560+
file('snpTableForSnpEff.tsv.gz')
2561+
file('snpTableWithUncertaintyCalls.tsv.gz')
2562+
file('structureGenotypes.tsv.gz')
2563+
file('structureGenotypes_noMissingData-Columns.tsv.gz')
25602564
file('MultiVCFAnalyzer.json') optional true into ch_multivcfanalyzer_for_multiqc
25612565

25622566
script:
@@ -2649,8 +2653,8 @@ process sex_deterrmine {
26492653
script:
26502654
"""
26512655
samtools index ${input}
2652-
angsd -i ${input} -r ${params.contamination_chrom_name}:5000000-154900000 -doCounts 1 -iCounts 1 -minMapQ 30 -minQ 30 -out ${input.baseName}.doCounts
2653-
contamination -a ${input.baseName}.doCounts.icnts.gz -h ${baseDir}/assets/angsd_resources/HapMapChrX.gz 2> ${input.baseName}.X.contamination.out
2656+
angsd -i ${input} -r ${params.contamination_chrom_name}:5000000-154900000 -doCounts 1 -iCounts 1 -minMapQ 30 -minQ 30 -out ${libraryid}.doCounts
2657+
contamination -a ${libraryid}.doCounts.icnts.gz -h ${baseDir}/assets/angsd_resources/HapMapChrX.gz 2> ${libraryid}.X.contamination.out
26542658
"""
26552659
}
26562660

0 commit comments

Comments
 (0)