Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

* Added Sequencetools (1.4.0.6) that adds the ability to do genotyping with the 'pileupCaller'
* Latest version of DeDup (0.12.6) which now reports mapped reads after deduplication
* [#560] Latest version of Dedup (0.12.7), which now correctly reports deduplication statistics based on calculations of mapped reads only (prior denominator was total reads of BAM file)
* Latest version of ANGSD (0.933) which doesn't seg fault when running contamination on BAMs with insufficient reads
* Latest version of MultiQC (1.9) with support for lots of extra tools in the pipeline (MALT, SexDetERRmine, DamageProfiler, MultiVCFAnalyzer)
* Latest versions of Pygments (7.1), Pymdown-Extensions (2.6.1) and Markdown (3.2.2) for documentation output
Expand Down
6 changes: 6 additions & 0 deletions assets/multiqc_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,9 @@ table_columns_visible:
Method2_MOM_SE: False
Method2_ML_estimate: False
Method2_ML_SE: False
snp_coverage:
Covered_Snps: True
Total_Snps: False

table_columns_placement:
FastQC (pre-AdapterRemoval):
Expand Down Expand Up @@ -220,6 +223,9 @@ table_columns_placement:
Method2_MOM_SE: 1160
Method2_ML_estimate: 1170
Method2_ML_SE: 1180
snp_coverage:
Covered_Snps: 1050
Total_Snps: 1060
DeDup:
mapped_after_dedup: 620
clusterfactor: 630
Expand Down
29 changes: 29 additions & 0 deletions bin/parse_snp_cov.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env python3
import sys, json
from collections import OrderedDict

jsonOut = OrderedDict()
data = OrderedDict()


input = open(sys.argv[1], 'r')
for line in input:
fields = line.strip().split()
sample_id = fields[0]
covered_snps = fields[1]
total_snps = fields[2]
if sample_id[0] == "#":
continue

data[sample_id] = {"Covered_Snps":covered_snps, "Total_Snps":total_snps}

jsonOut = {"plot_type": "generalstats", "id": "snp_coverage",
"pconfig": {
"Covered_Snps" : {"title" : "#SNPs Covered"},
"Total_Snps" : {"title": "#SNPs Total"}
},
"data" : data
}

with open(sys.argv[1].rstrip('.txt')+'_mqc.json', 'w') as outfile:
json.dump(jsonOut, outfile)
4 changes: 3 additions & 1 deletion bin/scrape_software_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
'MTNucRatioCalculator':['v_mtnucratiocalculator.txt',r"Version: (\S+)"],
'VCF2genome':['v_vcf2genome.txt', r"VCF2Genome \(v. ([0-9].[0-9]+) "],
'endorS.py':['v_endorSpy.txt', r"endorS.py (\S+)"],
'kraken':['v_kraken.txt', r"Kraken version (\S+)"]
'kraken':['v_kraken.txt', r"Kraken version (\S+)"],
'eigenstrat_snp_coverage':['v_eigenstrat_snp_coverage.txt',r"(\S+)"]
}

results = OrderedDict()
Expand Down Expand Up @@ -69,6 +70,7 @@
results['malt'] = '<span style="color:#999999;\">N/A</span>'
results['kraken'] = '<span style="color:#999999;\">N/A</span>'
results['maltextract'] = '<span style="color:#999999;\">N/A</span>'
results['eigenstrat_snp_coverage'] = '<span style="color:#999999;\">N/A</span>'

# Search each file using its regex
for k, v in regexes.items():
Expand Down
5 changes: 3 additions & 2 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ dependencies:
- bioconda::bwa=0.7.17
- bioconda::picard=2.22.9
- bioconda::samtools=1.9
- bioconda::dedup=0.12.6
- bioconda::dedup=0.12.7
- bioconda::angsd=0.933
- bioconda::circularmapper=1.93.5
- bioconda::gatk4=4.1.7.0
Expand All @@ -33,7 +33,7 @@ dependencies:
- bioconda::fastp=0.20.1
- bioconda::bamutil=1.0.14
- bioconda::mtnucratio=0.7
- pysam=0.15.4 #Says python3.7 or less
- bioconda::pysam=0.15.4 #Says python3.7 or less
- bioconda::kraken2=2.0.9beta
- conda-forge::pandas=1.0.4 #.4 is python3.8+ compatible
- bioconda::freebayes=1.3.2 #should be fine with python 3.8, but says <3.7 on webpage
Expand All @@ -43,4 +43,5 @@ dependencies:
- conda-forge::biopython=1.76
- conda-forge::xopen=0.9.0
- bioconda::bowtie2=2.4.1
- bioconda::eigenstratdatabasetools=1.0.2
#Missing Schmutzi,snpAD
31 changes: 29 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2599,7 +2599,7 @@ if (params.pileupcaller_snpfile.isEmpty ()) {
path(snp) from ch_snp_for_pileupcaller.collect().dump(tag: "Pileupcaller SNP file")

output:
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("pileupcaller.${strandedness}.*")
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("pileupcaller.${strandedness}.*") into ch_for_eigenstrat_snp_coverage

script:
def use_bed = bed.getName() != 'nf-core_eager_dummy.txt' ? "-l ${bed}" : ''
Expand All @@ -2614,7 +2614,32 @@ if (params.pileupcaller_snpfile.isEmpty ()) {
samtools mpileup -B -q 30 -Q 30 ${use_bed} -f ${fasta} ${bam_list} | pileupCaller ${caller} ${ssmode} ${transitions_mode} --sampleNames ${sample_names} ${use_snp} -e pileupcaller.${strandedness}
"""
}


process eigenstrat_snp_coverage {
label 'mc_tiny'
tag "${strandedness}"
publishDir "${params.outdir}/genotyping", mode: params.publish_dir_mode

when:
params.run_genotyping && params.genotyping_tool == 'pileupcaller'

input:
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*") from ch_for_eigenstrat_snp_coverage.dump()

output:
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.json") into ch_eigenstrat_snp_cov_for_multiqc
path("*_eigenstrat_coverage.txt")

script:
/* """
eigenstrat_snp_coverage -i pileupcaller.${strandedness} -s ".txt" >${strandedness}_eigenstrat_coverage.txt -j ${strandedness}_eigenstrat_coverage_mqc.json
"""*/
Comment thread
jfy133 marked this conversation as resolved.
"""
eigenstrat_snp_coverage -i pileupcaller.${strandedness} -s ".txt" >${strandedness}_eigenstrat_coverage.txt
parse_snp_cov.py ${strandedness}_eigenstrat_coverage.txt
"""
}

process genotyping_angsd {
label 'mc_small'
tag "${samplename}"
Expand Down Expand Up @@ -3139,6 +3164,7 @@ process get_software_versions {
endorS.py --version &> v_endorSpy.txt || true
pileupCaller --version &> v_sequencetools.txt 2>&1 || true
bowtie2 --version | grep -a 'bowtie2-.* -fdebug' > v_bowtie2.txt || true
eigenstrat_snp_coverage --version | cut -d ' ' -f2 >v_eigenstrat_snp_coverage.txt || true

scrape_software_versions.py &> software_versions_mqc.yaml
"""
Expand Down Expand Up @@ -3176,6 +3202,7 @@ process multiqc {
file ('kraken/*') from ch_kraken_for_multiqc.collect().ifEmpty([])
file ('hops/*') from ch_hops_for_multiqc.collect().ifEmpty([])
file ('nuclear_contamination/*') from ch_nuclear_contamination_for_multiqc.collect().ifEmpty([])
file ('genotyping/*') from ch_eigenstrat_snp_cov_for_multiqc

file workflow_summary from ch_workflow_summary.collectFile(name: "workflow_summary_mqc.yaml")

Expand Down