Skip to content

Commit 7440cbb

Browse files
authored
Merge pull request #507 from nf-core/docs-updates
A few tweaks to profile tutorial
2 parents 5702d88 + c6b547f commit 7440cbb

16 files changed

Lines changed: 4919 additions & 5216 deletions

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
2525
* Added large 'fullsize' dataset test-profiles for ancient fish, human, and a draft pathogen contexts.
2626
* [#257](https://github.com/nf-core/eager/issues/257) Added the bowtie2 aligner as option for mapping, following Poullet and Orlando 2020 doi: [10.3389/fevo.2020.00105](https://doi.org/10.3389/fevo.2020.00105)
2727
* [#451] Adds ANGSD genotype likelihood calculations as alternative to typical 'genotypers'
28+
* [#504] Removed sexdeterrmine-snps plot from MultiQC report.
29+
* Nuclear contamination results are now shown in the MultiQC report.
30+
* Tutorial on how to use profiles for reproducible science (i.e. parameter sharing between different groups)
2831

2932
### `Fixed`
3033

@@ -58,6 +61,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
5861
* Latest version of FreeBayes (1.3.2)
5962
* Latest version of xopen (0.9.0)
6063
* Added Bowtie 2 (2.4.1)
64+
* Latest version of Sex.DetERRmine (1.1.2)
6165
* Latest version of endorS.py (0.3)
6266

6367
## [2.1.0] - 2020-03-05 - "Ravensburg"

assets/multiqc_config.yaml

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ qualimap_config:
104104
- 4
105105
- 5
106106

107+
remove_sections:
108+
- sexdeterrmine-snps
109+
107110
table_columns_visible:
108111
FastQC (pre-AdapterRemoval):
109112
percent_duplicates: False
@@ -159,6 +162,19 @@ table_columns_visible:
159162
percentage_aligned: False
160163
MultiVCFAnalyzer:
161164
Heterozygous SNP alleles (percent): True
165+
custom_content:
166+
endogenous_dna: True
167+
endogenous_dna_post: True
168+
nuclear_contamination:
169+
Num_SNPs: True
170+
Method1_MOM_estimate: False
171+
Method1_MOM_SE: False
172+
Method1_ML_estimate: True
173+
Method1_ML_SE: True
174+
Method2_MOM_estimate: False
175+
Method2_MOM_SE: False
176+
Method2_ML_estimate: False
177+
Method2_ML_SE: False
162178

163179
table_columns_placement:
164180
FastQC (pre-AdapterRemoval):
@@ -192,6 +208,15 @@ table_columns_placement:
192208
custom_content:
193209
endogenous_dna: 600
194210
endogenous_dna_post: 610
211+
Num_SNPs: 1100
212+
Method1_MOM_estimate: 1110
213+
Method1_MOM_SE: 1120
214+
Method1_ML_estimate: 1130
215+
Method1_ML_SE: 1140
216+
Method2_MOM_estimate: 1150
217+
Method2_MOM_SE: 1160
218+
Method2_ML_estimate: 1170
219+
Method2_ML_SE: 1180
195220
DeDup:
196221
mapped_after_dedup: 620
197222
clusterfactor: 630
@@ -218,10 +243,10 @@ table_columns_placement:
218243
5_x_pc: 860
219244
avg_gc: 870
220245
sexdeterrmine:
221-
RateX: 100
246+
RateX: 1000
222247
RateY: 1010
223248
MultiVCFAnalyzer:
224-
Heterozygous SNP alleles (percent): 1100
249+
Heterozygous SNP alleles (percent): 1200
225250
read_count_multiplier: 1
226251
read_count_prefix: ''
227252
read_count_desc: ''

bin/print_x_contamination.py

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,26 @@
22
import sys, re, json
33
from collections import OrderedDict
44

5+
jsonOut=OrderedDict()
56
data=OrderedDict()
67

8+
## Function to convert a set of elements into floating point numbers, when possible, else leave them be.
9+
def make_float(x):
10+
print (x)
11+
output=[None for i in range(len(x))]
12+
## If value for an estimate/error is -nan, replace with "NA". JSON does not accept NaN as a valid field.
13+
for i in range(len(x)):
14+
if x[i] == "-nan":
15+
output[i]="N/A"
16+
continue
17+
try:
18+
output[i]=float(x[i])
19+
except:
20+
output[i]=x[i]
21+
22+
return(tuple(output))
23+
24+
725
Input_files=sys.argv[1:]
826

927
output = open("nuclear_contamination.txt", 'w')
@@ -16,7 +34,7 @@
1634
ml2, err_ml2="N/A","N/A"
1735
with open(fn, 'r') as f:
1836
Estimates={}
19-
Ind=re.sub('\.X.contamination.out$', '', fn)
37+
Ind=re.sub('\.X.contamination.out$', '', fn).split("/")[-1]
2038
for line in f:
2139
fields=line.strip().split()
2240
if line.strip()[0:19] == "We have nSNP sites:":
@@ -26,16 +44,33 @@
2644
err_mom1=fields[4].split(":")[1]
2745
ml1=fields[5].split(":")[1]
2846
err_ml1=fields[6].split(":")[1]
29-
## Sometimes angsd fails to run method 2, and the error is printed directly after the SE for ML. When that happens, exclude the first word in the error from the output. (Method 2 data will be shown as NA)
47+
## Sometimes angsd fails to run method 2, and the error is printed directly after the SE for ML. When that happens, exclude the first word in the error from the output. (Method 2 jsonOut will be shown as NA)
3048
if err_ml1.endswith("contamination"):
3149
err_ml1 = err_ml1[:-13]
3250
elif line.strip()[0:7] == "Method2" and line.strip()[9:16] == 'new_llh':
3351
mom2=fields[3].split(":")[1]
3452
err_mom2=fields[4].split(":")[1]
3553
ml2=fields[5].split(":")[1]
3654
err_ml2=fields[6].split(":")[1]
37-
data[Ind]={ "Number_of_SNPs" : nSNPs, "Method1_MOM_estimate" : mom1, "Method1_MOM_SE" : err_mom1, "Method1_ML_estimate" : ml1, "Method1_ML_SE" : err_ml1, "Method2_MOM_estimate" : mom2, "Method2_MOM_SE" : err_mom2, "Method2_ML_estimate" : ml2, "Method2_ML_SE" : err_ml2 }
55+
## Convert estimates and errors to floating point numbers
56+
(ml1, err_ml1, mom1, err_mom1, ml2, err_ml2, mom2, err_mom2) = make_float((ml1, err_ml1, mom1, err_mom1, ml2, err_ml2, mom2, err_mom2))
57+
data[Ind]={ "Num_SNPs" : int(nSNPs), "Method1_MOM_estimate" : mom1, "Method1_MOM_SE" : err_mom1, "Method1_ML_estimate" : ml1, "Method1_ML_SE" : err_ml1, "Method2_MOM_estimate" : mom2, "Method2_MOM_SE" : err_mom2, "Method2_ML_estimate" : ml2, "Method2_ML_SE" : err_ml2 }
3858
print (Ind, nSNPs, mom1, err_mom1, ml1, err_ml1, mom2, err_mom2, ml2, err_ml2, sep="\t", file=output)
3959

40-
with open('nuclear_contamination.json', 'w') as outfile:
41-
json.dump(data, outfile)
60+
61+
jsonOut = {"plot_type": "generalstats", "id": "nuclear_contamination",
62+
"pconfig": {
63+
"Num_SNPs" : {"title" : "Number of SNPs"},
64+
"Method1_MOM_estimate" : {"title": "Contamination Estimate (Method1_MOM)"},
65+
"Method1_MOM_SE" : {"title": "Estimate Error (Method1_MOM)"},
66+
"Method1_ML_estimate" : {"title": "Contamination Estimate (Method1_ML)"},
67+
"Method1_ML_SE" : {"title": "Estimate Error (Method1_ML)"},
68+
"Method2_MOM_estimate" : {"title": "Contamination Estimate (Method2_MOM)"},
69+
"Method2_MOM_SE" : {"title": "Estimate Error (Method2_MOM)"},
70+
"Method2_ML_estimate" : {"title": "Contamination Estimate (Method2_ML)"},
71+
"Method2_ML_SE" : {"title": "Estimate Error (Method2_ML)"}
72+
},
73+
"data" : data
74+
}
75+
with open('nuclear_contamination_mqc.json', 'w') as outfile:
76+
json.dump(jsonOut, outfile)
-448 KB
Loading

0 commit comments

Comments
 (0)