Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
* [#451] Adds ANGSD genotype likelihood calculations as alternative to typical 'genotypers'
* [#504] Removed sexdeterrmine-snps plot from MultiQC report.
* Nuclear contamination results are now shown in the MultiQC report.
* Nuclear contamination is now reported with the correct library names.
Comment thread
TCLamnidis marked this conversation as resolved.
Outdated
* Tutorial on how to use profiles for reproducible science (i.e. parameter sharing between different groups)

### `Fixed`
Expand All @@ -42,6 +43,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
* [#473](https://github.com/nf-core/eager/issues/473) - Fixed bug in sexdet_process on AWS
* [#444](https://github.com/nf-core/eager/issues/444) - Provide option for preserving realigned bam + index
* Increase MultiQC process memory requirements to ensure enough memory for large runs
* Fixed deduplication output logic. Will now pass along only the post-rmdup bams if duplicate removal is not skipped, instead of both the post-rmdup and pre-rmdup bams.
* [#497](https://github.com/nf-core/eager/issues/497) - Simplifies number of parameters required to run bam filtering
* [#501](https://github.com/nf-core/eager/issues/501) - Adds additional validation checks for MALT/MaltExtract database input files
* [#508](https://github.com/nf-core/eager/issues/508) - Made Markduplicates default dedupper due to narrower context specificity of dedup
Expand Down
4 changes: 2 additions & 2 deletions bin/print_x_contamination.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ def make_float(x):
Ind=re.sub('\.X.contamination.out$', '', fn).split("/")[-1]
for line in f:
fields=line.strip().split()
if line.strip()[0:19] == "We have nSNP sites:":
nSNPs=fields[4][:-1]
if line.strip()[0:21] == "[readicnts] Has read:":
nSNPs=fields[4]
elif line.strip()[0:7] == "Method1" and line.strip()[9:16] == 'new_llh':
mom1=fields[3].split(":")[1]
err_mom1=fields[4].split(":")[1]
Expand Down
38 changes: 21 additions & 17 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -1431,7 +1431,7 @@ process circularmapper{
file fasta from ch_fasta_for_circularmapper.collect()

output:
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.mapped.bam"), file("*.{bai,csi}") into ch_output_from_cm, ch_outputindex_from_cm
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.mapped.bam"), file("*.{bai,csi}") into ch_output_from_cm

when:
params.mapper == 'circularmapper'
Expand Down Expand Up @@ -1711,7 +1711,7 @@ process samtools_filter {
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_seqtypemerged_for_samtools_filter

output:
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*filtered.bam"), file("*.{bai,csi}") into ch_output_from_filtering,ch_outputindex_from_filtering
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*filtered.bam"), file("*.{bai,csi}") into ch_output_from_filtering
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.unmapped.fastq.gz") optional true into ch_bam_filtering_for_metagenomic
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.unmapped.bam") optional true

Expand Down Expand Up @@ -1899,9 +1899,13 @@ process markDup{

// This is for post-deduplcation per-library evaluation steps _without_ any
// form of library merging.
ch_skiprmdup_for_libeval.mix(ch_dedup_for_libeval, ch_markdup_for_libeval)
.into{ ch_rmdup_for_preseq; ch_rmdup_for_damageprofiler; ch_for_nuclear_contamination; ch_rmdup_formtnucratio }

if ( params.skip_deduplication ) {
ch_skiprmdup_for_libeval.mix(ch_dedup_for_libeval, ch_markdup_for_libeval)
.into{ ch_rmdup_for_preseq; ch_rmdup_for_damageprofiler; ch_for_nuclear_contamination; ch_rmdup_formtnucratio }
} else {
ch_dedup_for_libeval.mix(ch_markdup_for_libeval)
.into{ ch_rmdup_for_preseq; ch_rmdup_for_damageprofiler; ch_for_nuclear_contamination; ch_rmdup_formtnucratio }
}

// Merge independent libraries sequenced but with same treatment (often done to improve complexity). Different strand/UDG libs not merged because bamtrim/pmdtools needs UDG info

Expand Down Expand Up @@ -2540,16 +2544,16 @@ if (params.additional_vcf_files == '') {
file fasta from ch_fasta_for_multivcfanalyzer.collect()

output:
file('fullAlignment.fasta.gz') into ch_output_multivcfanalyzer_fullalignment
file('info.txt.gz') into ch_output_multivcfanalyzer_info
file('snpAlignment.fasta.gz') into ch_output_multivcfanalyzer_snpalignment
file('snpAlignmentIncludingRefGenome.fasta.gz') into ch_output_multivcfanalyzer_snpalignmentref
file('snpStatistics.tsv.gz') into ch_output_multivcfanalyzer_snpstatistics
file('snpTable.tsv.gz') into ch_output_multivcfanalyzer_snptable
file('snpTableForSnpEff.tsv.gz') into ch_output_multivcfanalyzer_snptablesnpeff
file('snpTableWithUncertaintyCalls.tsv.gz') into ch_output_multivcfanalyzer_snptableuncertainty
file('structureGenotypes.tsv.gz') into ch_output_multivcfanalyzer_structuregenotypes
file('structureGenotypes_noMissingData-Columns.tsv.gz') into ch_output_multivcfanalyzer_structuregenotypesclean
file('fullAlignment.fasta.gz')
file('info.txt.gz')
file('snpAlignment.fasta.gz')
file('snpAlignmentIncludingRefGenome.fasta.gz')
file('snpStatistics.tsv.gz')
file('snpTable.tsv.gz')
file('snpTableForSnpEff.tsv.gz')
file('snpTableWithUncertaintyCalls.tsv.gz')
file('structureGenotypes.tsv.gz')
file('structureGenotypes_noMissingData-Columns.tsv.gz')
file('MultiVCFAnalyzer.json') optional true into ch_multivcfanalyzer_for_multiqc

script:
Expand Down Expand Up @@ -2642,8 +2646,8 @@ process sex_deterrmine {
script:
"""
samtools index ${input}
angsd -i ${input} -r ${params.contamination_chrom_name}:5000000-154900000 -doCounts 1 -iCounts 1 -minMapQ 30 -minQ 30 -out ${input.baseName}.doCounts
contamination -a ${input.baseName}.doCounts.icnts.gz -h ${baseDir}/assets/angsd_resources/HapMapChrX.gz 2> ${input.baseName}.X.contamination.out
angsd -i ${input} -r ${params.contamination_chrom_name}:5000000-154900000 -doCounts 1 -iCounts 1 -minMapQ 30 -minQ 30 -out ${libraryid}.doCounts
contamination -a ${libraryid}.doCounts.icnts.gz -h ${baseDir}/assets/angsd_resources/HapMapChrX.gz 2> ${libraryid}.X.contamination.out
"""
}

Expand Down