diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b021fc67..c81da6bef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,10 +42,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * [#473](https://github.com/nf-core/eager/issues/473) - Fixed bug in sexdet_process on AWS * [#444](https://github.com/nf-core/eager/issues/444) - Provide option for preserving realigned bam + index * Increase MultiQC process memory requirements to ensure enough memory for large runs +* Fixed deduplication output logic. Will now pass along only the post-rmdup bams if duplicate removal is not skipped, instead of both the post-rmdup and pre-rmdup bams. * [#497](https://github.com/nf-core/eager/issues/497) - Simplifies number of parameters required to run bam filtering * [#501](https://github.com/nf-core/eager/issues/501) - Adds additional validation checks for MALT/MaltExtract database input files * [#508](https://github.com/nf-core/eager/issues/508) - Made Markduplicates default dedupper due to narrower context specificity of dedup * [#516](https://github.com/nf-core/eager/issues/516) - Made bedtools not report out of memory exit code when warning of inconsistant FASTA/Bed entry names +* Nuclear contamination is now reported with the correct library names. ### `Dependencies` diff --git a/bin/print_x_contamination.py b/bin/print_x_contamination.py index af1adc42d..3cb544ac3 100755 --- a/bin/print_x_contamination.py +++ b/bin/print_x_contamination.py @@ -37,8 +37,8 @@ def make_float(x): Ind=re.sub('\.X.contamination.out$', '', fn).split("/")[-1] for line in f: fields=line.strip().split() - if line.strip()[0:19] == "We have nSNP sites:": - nSNPs=fields[4][:-1] + if line.strip()[0:21] == "[readicnts] Has read:": + nSNPs=fields[4] elif line.strip()[0:7] == "Method1" and line.strip()[9:16] == 'new_llh': mom1=fields[3].split(":")[1] err_mom1=fields[4].split(":")[1] diff --git a/main.nf b/main.nf index eeb7709d3..36b7177fd 100644 --- a/main.nf +++ b/main.nf @@ -1431,7 +1431,7 @@ process circularmapper{ file fasta from ch_fasta_for_circularmapper.collect() output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.mapped.bam"), file("*.{bai,csi}") into ch_output_from_cm, ch_outputindex_from_cm + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.mapped.bam"), file("*.{bai,csi}") into ch_output_from_cm when: params.mapper == 'circularmapper' @@ -1711,7 +1711,7 @@ process samtools_filter { tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_seqtypemerged_for_samtools_filter output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*filtered.bam"), file("*.{bai,csi}") into ch_output_from_filtering,ch_outputindex_from_filtering + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*filtered.bam"), file("*.{bai,csi}") into ch_output_from_filtering tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.unmapped.fastq.gz") optional true into ch_bam_filtering_for_metagenomic tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.unmapped.bam") optional true @@ -1899,9 +1899,13 @@ process markDup{ // This is for post-deduplcation per-library evaluation steps _without_ any // form of library merging. -ch_skiprmdup_for_libeval.mix(ch_dedup_for_libeval, ch_markdup_for_libeval) - .into{ ch_rmdup_for_preseq; ch_rmdup_for_damageprofiler; ch_for_nuclear_contamination; ch_rmdup_formtnucratio } - +if ( params.skip_deduplication ) { + ch_skiprmdup_for_libeval.mix(ch_dedup_for_libeval, ch_markdup_for_libeval) + .into{ ch_rmdup_for_preseq; ch_rmdup_for_damageprofiler; ch_for_nuclear_contamination; ch_rmdup_formtnucratio } +} else { + ch_dedup_for_libeval.mix(ch_markdup_for_libeval) + .into{ ch_rmdup_for_preseq; ch_rmdup_for_damageprofiler; ch_for_nuclear_contamination; ch_rmdup_formtnucratio } +} // Merge independent libraries sequenced but with same treatment (often done to improve complexity). Different strand/UDG libs not merged because bamtrim/pmdtools needs UDG info @@ -2540,16 +2544,16 @@ if (params.additional_vcf_files == '') { file fasta from ch_fasta_for_multivcfanalyzer.collect() output: - file('fullAlignment.fasta.gz') into ch_output_multivcfanalyzer_fullalignment - file('info.txt.gz') into ch_output_multivcfanalyzer_info - file('snpAlignment.fasta.gz') into ch_output_multivcfanalyzer_snpalignment - file('snpAlignmentIncludingRefGenome.fasta.gz') into ch_output_multivcfanalyzer_snpalignmentref - file('snpStatistics.tsv.gz') into ch_output_multivcfanalyzer_snpstatistics - file('snpTable.tsv.gz') into ch_output_multivcfanalyzer_snptable - file('snpTableForSnpEff.tsv.gz') into ch_output_multivcfanalyzer_snptablesnpeff - file('snpTableWithUncertaintyCalls.tsv.gz') into ch_output_multivcfanalyzer_snptableuncertainty - file('structureGenotypes.tsv.gz') into ch_output_multivcfanalyzer_structuregenotypes - file('structureGenotypes_noMissingData-Columns.tsv.gz') into ch_output_multivcfanalyzer_structuregenotypesclean + file('fullAlignment.fasta.gz') + file('info.txt.gz') + file('snpAlignment.fasta.gz') + file('snpAlignmentIncludingRefGenome.fasta.gz') + file('snpStatistics.tsv.gz') + file('snpTable.tsv.gz') + file('snpTableForSnpEff.tsv.gz') + file('snpTableWithUncertaintyCalls.tsv.gz') + file('structureGenotypes.tsv.gz') + file('structureGenotypes_noMissingData-Columns.tsv.gz') file('MultiVCFAnalyzer.json') optional true into ch_multivcfanalyzer_for_multiqc script: @@ -2642,8 +2646,8 @@ process sex_deterrmine { script: """ samtools index ${input} - angsd -i ${input} -r ${params.contamination_chrom_name}:5000000-154900000 -doCounts 1 -iCounts 1 -minMapQ 30 -minQ 30 -out ${input.baseName}.doCounts - contamination -a ${input.baseName}.doCounts.icnts.gz -h ${baseDir}/assets/angsd_resources/HapMapChrX.gz 2> ${input.baseName}.X.contamination.out + angsd -i ${input} -r ${params.contamination_chrom_name}:5000000-154900000 -doCounts 1 -iCounts 1 -minMapQ 30 -minQ 30 -out ${libraryid}.doCounts + contamination -a ${libraryid}.doCounts.icnts.gz -h ${baseDir}/assets/angsd_resources/HapMapChrX.gz 2> ${libraryid}.X.contamination.out """ }