Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
* [#473](https://github.com/nf-core/eager/issues/473) - Fixed bug in sexdet_process on AWS
* [#444](https://github.com/nf-core/eager/issues/444) - Provide option for preserving realigned bam + index
* Increase MultiQC process memory requirements to ensure enough memory for large runs
* Fixed deduplication output logic. Will now pass along only the post-rmdup bams if duplicate removal is not skipped, instead of both the post-rmdup and pre-rmdup bams.
* [#497](https://github.com/nf-core/eager/issues/497) - Simplifies number of parameters required to run bam filtering
* [#501](https://github.com/nf-core/eager/issues/501) - Adds additional validation checks for MALT/MaltExtract database input files
* [#508](https://github.com/nf-core/eager/issues/508) - Made Markduplicates default dedupper due to narrower context specificity of dedup
* [#516](https://github.com/nf-core/eager/issues/516) - Made bedtools not report out of memory exit code when warning of inconsistant FASTA/Bed entry names
* Nuclear contamination is now reported with the correct library names.

### `Dependencies`

Expand Down
4 changes: 2 additions & 2 deletions bin/print_x_contamination.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ def make_float(x):
Ind=re.sub('\.X.contamination.out$', '', fn).split("/")[-1]
for line in f:
fields=line.strip().split()
if line.strip()[0:19] == "We have nSNP sites:":
nSNPs=fields[4][:-1]
if line.strip()[0:21] == "[readicnts] Has read:":
nSNPs=fields[4]
elif line.strip()[0:7] == "Method1" and line.strip()[9:16] == 'new_llh':
mom1=fields[3].split(":")[1]
err_mom1=fields[4].split(":")[1]
Expand Down
38 changes: 21 additions & 17 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -1431,7 +1431,7 @@ process circularmapper{
file fasta from ch_fasta_for_circularmapper.collect()

output:
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.mapped.bam"), file("*.{bai,csi}") into ch_output_from_cm, ch_outputindex_from_cm
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.mapped.bam"), file("*.{bai,csi}") into ch_output_from_cm

when:
params.mapper == 'circularmapper'
Expand Down Expand Up @@ -1711,7 +1711,7 @@ process samtools_filter {
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_seqtypemerged_for_samtools_filter

output:
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*filtered.bam"), file("*.{bai,csi}") into ch_output_from_filtering,ch_outputindex_from_filtering
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*filtered.bam"), file("*.{bai,csi}") into ch_output_from_filtering
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.unmapped.fastq.gz") optional true into ch_bam_filtering_for_metagenomic
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.unmapped.bam") optional true

Expand Down Expand Up @@ -1899,9 +1899,13 @@ process markDup{

// This is for post-deduplcation per-library evaluation steps _without_ any
// form of library merging.
ch_skiprmdup_for_libeval.mix(ch_dedup_for_libeval, ch_markdup_for_libeval)
.into{ ch_rmdup_for_preseq; ch_rmdup_for_damageprofiler; ch_for_nuclear_contamination; ch_rmdup_formtnucratio }

if ( params.skip_deduplication ) {
ch_skiprmdup_for_libeval.mix(ch_dedup_for_libeval, ch_markdup_for_libeval)
.into{ ch_rmdup_for_preseq; ch_rmdup_for_damageprofiler; ch_for_nuclear_contamination; ch_rmdup_formtnucratio }
} else {
ch_dedup_for_libeval.mix(ch_markdup_for_libeval)
.into{ ch_rmdup_for_preseq; ch_rmdup_for_damageprofiler; ch_for_nuclear_contamination; ch_rmdup_formtnucratio }
}

// Merge independent libraries sequenced but with same treatment (often done to improve complexity). Different strand/UDG libs not merged because bamtrim/pmdtools needs UDG info

Expand Down Expand Up @@ -2540,16 +2544,16 @@ if (params.additional_vcf_files == '') {
file fasta from ch_fasta_for_multivcfanalyzer.collect()

output:
file('fullAlignment.fasta.gz') into ch_output_multivcfanalyzer_fullalignment
file('info.txt.gz') into ch_output_multivcfanalyzer_info
file('snpAlignment.fasta.gz') into ch_output_multivcfanalyzer_snpalignment
file('snpAlignmentIncludingRefGenome.fasta.gz') into ch_output_multivcfanalyzer_snpalignmentref
file('snpStatistics.tsv.gz') into ch_output_multivcfanalyzer_snpstatistics
file('snpTable.tsv.gz') into ch_output_multivcfanalyzer_snptable
file('snpTableForSnpEff.tsv.gz') into ch_output_multivcfanalyzer_snptablesnpeff
file('snpTableWithUncertaintyCalls.tsv.gz') into ch_output_multivcfanalyzer_snptableuncertainty
file('structureGenotypes.tsv.gz') into ch_output_multivcfanalyzer_structuregenotypes
file('structureGenotypes_noMissingData-Columns.tsv.gz') into ch_output_multivcfanalyzer_structuregenotypesclean
file('fullAlignment.fasta.gz')
file('info.txt.gz')
file('snpAlignment.fasta.gz')
file('snpAlignmentIncludingRefGenome.fasta.gz')
file('snpStatistics.tsv.gz')
file('snpTable.tsv.gz')
file('snpTableForSnpEff.tsv.gz')
file('snpTableWithUncertaintyCalls.tsv.gz')
file('structureGenotypes.tsv.gz')
file('structureGenotypes_noMissingData-Columns.tsv.gz')
file('MultiVCFAnalyzer.json') optional true into ch_multivcfanalyzer_for_multiqc

script:
Expand Down Expand Up @@ -2642,8 +2646,8 @@ process sex_deterrmine {
script:
"""
samtools index ${input}
angsd -i ${input} -r ${params.contamination_chrom_name}:5000000-154900000 -doCounts 1 -iCounts 1 -minMapQ 30 -minQ 30 -out ${input.baseName}.doCounts
contamination -a ${input.baseName}.doCounts.icnts.gz -h ${baseDir}/assets/angsd_resources/HapMapChrX.gz 2> ${input.baseName}.X.contamination.out
angsd -i ${input} -r ${params.contamination_chrom_name}:5000000-154900000 -doCounts 1 -iCounts 1 -minMapQ 30 -minQ 30 -out ${libraryid}.doCounts
contamination -a ${libraryid}.doCounts.icnts.gz -h ${baseDir}/assets/angsd_resources/HapMapChrX.gz 2> ${libraryid}.X.contamination.out
"""
}

Expand Down