nf-core · jfy133 · Jul 23, 2020 · Jul 22, 2020 · Jul 22, 2020 · Jul 22, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 * [#451] Adds ANGSD genotype likelihood calculations as alternative to typical 'genotypers'
 * [#504] Removed sexdeterrmine-snps plot from MultiQC report.
 * Nuclear contamination results are now shown in the MultiQC report.
+* Nuclear contamination is now reported with the correct library names.
 * Tutorial on how to use profiles for reproducible science (i.e. parameter sharing between different groups)
 
 ### `Fixed`
@@ -42,6 +43,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 * [#473](https://github.com/nf-core/eager/issues/473) - Fixed bug in sexdet_process on AWS
 * [#444](https://github.com/nf-core/eager/issues/444) - Provide option for preserving realigned bam + index
 * Increase MultiQC process memory requirements to ensure enough memory for large runs
+* Fixed deduplication output logic. Will now pass along only the post-rmdup bams if duplicate removal is not skipped, instead of both the post-rmdup and pre-rmdup bams.
 * [#497](https://github.com/nf-core/eager/issues/497) - Simplifies number of parameters required to run bam filtering
 * [#501](https://github.com/nf-core/eager/issues/501) - Adds additional validation checks for MALT/MaltExtract database input files
 * [#508](https://github.com/nf-core/eager/issues/508) - Made Markduplicates default dedupper due to narrower context specificity of dedup

diff --git a/bin/print_x_contamination.py b/bin/print_x_contamination.py
@@ -37,8 +37,8 @@ def make_float(x):
         Ind=re.sub('\.X.contamination.out$', '', fn).split("/")[-1]
         for line in f:
             fields=line.strip().split()
-            if line.strip()[0:19] == "We have nSNP sites:":
-                nSNPs=fields[4][:-1]
+            if line.strip()[0:21] == "[readicnts] Has read:":
+                nSNPs=fields[4]
             elif line.strip()[0:7] == "Method1" and line.strip()[9:16] == 'new_llh':
                 mom1=fields[3].split(":")[1]
                 err_mom1=fields[4].split(":")[1]

diff --git a/main.nf b/main.nf
@@ -1431,7 +1431,7 @@ process circularmapper{
     file fasta from ch_fasta_for_circularmapper.collect()
 
     output:
-    tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.mapped.bam"), file("*.{bai,csi}") into ch_output_from_cm, ch_outputindex_from_cm
+    tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.mapped.bam"), file("*.{bai,csi}") into ch_output_from_cm
 
     when: 
     params.mapper == 'circularmapper'
@@ -1711,7 +1711,7 @@ process samtools_filter {
     tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_seqtypemerged_for_samtools_filter
 
     output:
-    tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*filtered.bam"), file("*.{bai,csi}") into ch_output_from_filtering,ch_outputindex_from_filtering
+    tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*filtered.bam"), file("*.{bai,csi}") into ch_output_from_filtering
     tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.unmapped.fastq.gz") optional true into ch_bam_filtering_for_metagenomic
     tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.unmapped.bam") optional true
 
@@ -1899,9 +1899,13 @@ process markDup{
 
 // This is for post-deduplcation per-library evaluation steps _without_ any 
 // form of library merging. 
-ch_skiprmdup_for_libeval.mix(ch_dedup_for_libeval, ch_markdup_for_libeval)
-  .into{ ch_rmdup_for_preseq; ch_rmdup_for_damageprofiler; ch_for_nuclear_contamination; ch_rmdup_formtnucratio }
-
+if ( params.skip_deduplication ) {
+  ch_skiprmdup_for_libeval.mix(ch_dedup_for_libeval, ch_markdup_for_libeval)
+    .into{ ch_rmdup_for_preseq; ch_rmdup_for_damageprofiler; ch_for_nuclear_contamination; ch_rmdup_formtnucratio }
+} else {
+  ch_dedup_for_libeval.mix(ch_markdup_for_libeval)
+    .into{ ch_rmdup_for_preseq; ch_rmdup_for_damageprofiler; ch_for_nuclear_contamination; ch_rmdup_formtnucratio }
+}
 
 // Merge independent libraries sequenced but with same treatment (often done to improve complexity). Different strand/UDG libs not merged because bamtrim/pmdtools needs UDG info
 
@@ -2540,16 +2544,16 @@ if (params.additional_vcf_files == '') {
   file fasta from ch_fasta_for_multivcfanalyzer.collect()
 
   output:
-  file('fullAlignment.fasta.gz') into ch_output_multivcfanalyzer_fullalignment
-  file('info.txt.gz') into ch_output_multivcfanalyzer_info
-  file('snpAlignment.fasta.gz') into ch_output_multivcfanalyzer_snpalignment
-  file('snpAlignmentIncludingRefGenome.fasta.gz') into ch_output_multivcfanalyzer_snpalignmentref
-  file('snpStatistics.tsv.gz') into ch_output_multivcfanalyzer_snpstatistics
-  file('snpTable.tsv.gz') into ch_output_multivcfanalyzer_snptable
-  file('snpTableForSnpEff.tsv.gz') into ch_output_multivcfanalyzer_snptablesnpeff
-  file('snpTableWithUncertaintyCalls.tsv.gz') into ch_output_multivcfanalyzer_snptableuncertainty
-  file('structureGenotypes.tsv.gz') into ch_output_multivcfanalyzer_structuregenotypes
-  file('structureGenotypes_noMissingData-Columns.tsv.gz') into ch_output_multivcfanalyzer_structuregenotypesclean
+  file('fullAlignment.fasta.gz')
+  file('info.txt.gz')
+  file('snpAlignment.fasta.gz')
+  file('snpAlignmentIncludingRefGenome.fasta.gz')
+  file('snpStatistics.tsv.gz')
+  file('snpTable.tsv.gz')
+  file('snpTableForSnpEff.tsv.gz')
+  file('snpTableWithUncertaintyCalls.tsv.gz')
+  file('structureGenotypes.tsv.gz')
+  file('structureGenotypes_noMissingData-Columns.tsv.gz')
   file('MultiVCFAnalyzer.json') optional true into ch_multivcfanalyzer_for_multiqc
 
   script:
@@ -2642,8 +2646,8 @@ process sex_deterrmine {
     script:
     """
     samtools index ${input}
-    angsd -i ${input} -r ${params.contamination_chrom_name}:5000000-154900000 -doCounts 1 -iCounts 1 -minMapQ 30 -minQ 30 -out ${input.baseName}.doCounts
-    contamination -a ${input.baseName}.doCounts.icnts.gz -h ${baseDir}/assets/angsd_resources/HapMapChrX.gz 2> ${input.baseName}.X.contamination.out
+    angsd -i ${input} -r ${params.contamination_chrom_name}:5000000-154900000 -doCounts 1 -iCounts 1 -minMapQ 30 -minQ 30 -out ${libraryid}.doCounts
+    contamination -a ${libraryid}.doCounts.icnts.gz -h ${baseDir}/assets/angsd_resources/HapMapChrX.gz 2> ${libraryid}.X.contamination.out
     """
  }