nf-core
diff --git a/‎CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎assets/multiqc_config.yaml‎
Lines changed: 27 additions & 2 deletions b/‎assets/multiqc_config.yaml‎
Lines changed: 27 additions & 2 deletions
diff --git a/‎bin/print_x_contamination.py‎
Lines changed: 40 additions & 5 deletions b/‎bin/print_x_contamination.py‎
Lines changed: 40 additions & 5 deletions
diff --git a/‎docs/images/output/overview/eager2_workflow.png‎
-448 KB b/‎docs/images/output/overview/eager2_workflow.png‎
-448 KB
@@ -25,6 +25,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 * Added large 'fullsize' dataset test-profiles for ancient fish, human, and a draft pathogen contexts.
 * [#257](https://github.com/nf-core/eager/issues/257) Added the bowtie2 aligner as option for mapping, following Poullet and Orlando 2020 doi: [10.3389/fevo.2020.00105](https://doi.org/10.3389/fevo.2020.00105)
 * [#451] Adds ANGSD genotype likelihood calculations as alternative to typical 'genotypers'
+* [#504] Removed sexdeterrmine-snps plot from MultiQC report.
+* Nuclear contamination results are now shown in the MultiQC report.
+* Tutorial on how to use profiles for reproducible science (i.e. parameter sharing between different groups)
 
 ### `Fixed`
 
@@ -58,6 +61,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 * Latest version of FreeBayes (1.3.2)
 * Latest version of xopen (0.9.0)
 * Added Bowtie 2 (2.4.1)
+* Latest version of Sex.DetERRmine (1.1.2)
 * Latest version of endorS.py (0.3)
 
 ## [2.1.0] - 2020-03-05 - "Ravensburg"
 
@@ -104,6 +104,9 @@ qualimap_config:
         - 4
         - 5
 
+remove_sections:
+  - sexdeterrmine-snps
+
 table_columns_visible:
     FastQC (pre-AdapterRemoval):
         percent_duplicates: False
@@ -159,6 +162,19 @@ table_columns_visible:
         percentage_aligned: False
     MultiVCFAnalyzer:
         Heterozygous SNP alleles (percent): True
+    custom_content:
+        endogenous_dna: True
+        endogenous_dna_post: True
+    nuclear_contamination:
+        Num_SNPs: True
+        Method1_MOM_estimate: False
+        Method1_MOM_SE: False
+        Method1_ML_estimate: True
+        Method1_ML_SE: True
+        Method2_MOM_estimate: False
+        Method2_MOM_SE: False
+        Method2_ML_estimate: False
+        Method2_ML_SE: False
 
 table_columns_placement:
     FastQC (pre-AdapterRemoval):
@@ -192,6 +208,15 @@ table_columns_placement:
     custom_content:
         endogenous_dna: 600
         endogenous_dna_post: 610
+        Num_SNPs: 1100
+        Method1_MOM_estimate: 1110
+        Method1_MOM_SE: 1120
+        Method1_ML_estimate: 1130
+        Method1_ML_SE: 1140
+        Method2_MOM_estimate: 1150
+        Method2_MOM_SE: 1160
+        Method2_ML_estimate: 1170
+        Method2_ML_SE: 1180
     DeDup:
         mapped_after_dedup: 620
         clusterfactor: 630
@@ -218,10 +243,10 @@ table_columns_placement:
         5_x_pc: 860
         avg_gc: 870
     sexdeterrmine:
-        RateX: 100
+        RateX: 1000
         RateY: 1010
     MultiVCFAnalyzer:
-        Heterozygous SNP alleles (percent): 1100
+        Heterozygous SNP alleles (percent): 1200
 read_count_multiplier: 1
 read_count_prefix: ''
 read_count_desc: ''
 
@@ -2,8 +2,26 @@
 import sys, re, json
 from collections import OrderedDict
 
+jsonOut=OrderedDict()
 data=OrderedDict()
 
+## Function to convert a set of elements into floating point numbers, when possible, else leave them be.
+def make_float(x):
+    print (x)
+    output=[None for i in range(len(x))]
+    ## If value for an estimate/error is -nan, replace with "NA". JSON does not accept NaN as a valid field.
+    for i in range(len(x)):
+        if x[i] == "-nan":
+            output[i]="N/A"
+            continue
+        try:
+            output[i]=float(x[i])
+        except:
+            output[i]=x[i]
+    
+    return(tuple(output))
+
+
 Input_files=sys.argv[1:]
 
 output = open("nuclear_contamination.txt", 'w')
@@ -16,7 +34,7 @@
     ml2, err_ml2="N/A","N/A"
     with open(fn, 'r') as f:
         Estimates={}
-        Ind=re.sub('\.X.contamination.out$', '', fn)
+        Ind=re.sub('\.X.contamination.out$', '', fn).split("/")[-1]
         for line in f:
             fields=line.strip().split()
             if line.strip()[0:19] == "We have nSNP sites:":
@@ -26,16 +44,33 @@
                 err_mom1=fields[4].split(":")[1]
                 ml1=fields[5].split(":")[1]
                 err_ml1=fields[6].split(":")[1]
-                ## Sometimes angsd fails to run method 2, and the error is printed directly after the SE for ML. When that happens, exclude the first word in the error from the output. (Method 2 data will be shown as NA)
+                ## Sometimes angsd fails to run method 2, and the error is printed directly after the SE for ML. When that happens, exclude the first word in the error from the output. (Method 2 jsonOut will be shown as NA)
                 if err_ml1.endswith("contamination"):
                     err_ml1 = err_ml1[:-13]
             elif line.strip()[0:7] == "Method2" and line.strip()[9:16] == 'new_llh':
                 mom2=fields[3].split(":")[1]
                 err_mom2=fields[4].split(":")[1]
                 ml2=fields[5].split(":")[1]
                 err_ml2=fields[6].split(":")[1]
-        data[Ind]={ "Number_of_SNPs" : nSNPs, "Method1_MOM_estimate" : mom1, "Method1_MOM_SE" : err_mom1, "Method1_ML_estimate" : ml1, "Method1_ML_SE" : err_ml1, "Method2_MOM_estimate" : mom2, "Method2_MOM_SE" : err_mom2, "Method2_ML_estimate" : ml2, "Method2_ML_SE" : err_ml2 }
+        ## Convert estimates and errors to floating point numbers
+        (ml1, err_ml1, mom1, err_mom1, ml2, err_ml2, mom2, err_mom2) = make_float((ml1, err_ml1, mom1, err_mom1, ml2, err_ml2, mom2, err_mom2))
+        data[Ind]={ "Num_SNPs" : int(nSNPs), "Method1_MOM_estimate" : mom1, "Method1_MOM_SE" : err_mom1, "Method1_ML_estimate" : ml1, "Method1_ML_SE" : err_ml1, "Method2_MOM_estimate" : mom2, "Method2_MOM_SE" : err_mom2, "Method2_ML_estimate" : ml2, "Method2_ML_SE" : err_ml2 }
         print (Ind, nSNPs, mom1, err_mom1, ml1, err_ml1, mom2, err_mom2, ml2, err_ml2, sep="\t", file=output)
 
-with open('nuclear_contamination.json', 'w') as outfile:
-    json.dump(data, outfile)
+
+jsonOut = {"plot_type": "generalstats", "id": "nuclear_contamination",
+    "pconfig": {
+        "Num_SNPs" : {"title" : "Number of SNPs"},
+        "Method1_MOM_estimate" : {"title": "Contamination Estimate (Method1_MOM)"},
+        "Method1_MOM_SE" : {"title": "Estimate Error (Method1_MOM)"},
+        "Method1_ML_estimate" : {"title": "Contamination Estimate (Method1_ML)"},
+        "Method1_ML_SE" : {"title": "Estimate Error (Method1_ML)"},
+        "Method2_MOM_estimate" : {"title": "Contamination Estimate (Method2_MOM)"},
+        "Method2_MOM_SE" : {"title": "Estimate Error (Method2_MOM)"},
+        "Method2_ML_estimate" : {"title": "Contamination Estimate (Method2_ML)"},
+        "Method2_ML_SE" : {"title": "Estimate Error (Method2_ML)"}
+    }, 
+    "data" : data
+}
+with open('nuclear_contamination_mqc.json', 'w') as outfile:
+    json.dump(jsonOut, outfile)