nf-core
diff --git a/‎.github/markdownlint.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/markdownlint.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 14 additions & 14 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 14 additions & 14 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 6 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 4 additions & 0 deletions b/‎README.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎assets/dummy.txt‎
Lines changed: 1 addition & 0 deletions b/‎assets/dummy.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎assets/dummy_postfilterflagstat.stats‎
Lines changed: 0 additions & 1 deletion b/‎assets/dummy_postfilterflagstat.stats‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎assets/multiqc_config.yaml‎
Lines changed: 0 additions & 1 deletion b/‎assets/multiqc_config.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎bin/endorS.py‎
Lines changed: 23 additions & 7 deletions b/‎bin/endorS.py‎
Lines changed: 23 additions & 7 deletions
diff --git a/‎conf/base.config‎
Lines changed: 4 additions & 4 deletions b/‎conf/base.config‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎conf/test.config‎
Lines changed: 0 additions & 3 deletions b/‎conf/test.config‎
Lines changed: 0 additions & 3 deletions
@@ -8,3 +8,5 @@ no-inline-html:
         - img
         - p
         - kbd
+        - details
+        - summary
@@ -52,13 +52,13 @@ jobs:
           git clone --single-branch --branch eager https://github.com/nf-core/test-datasets.git data
       - name: BASIC Run the basic pipeline with directly supplied single-end FASTQ
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --input 'data/testdata/Mammoth/fastq/*_R1_*.fq.gz' --single_end
+          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --input 'data/testdata/Mammoth/fastq/*_R1_*.fq.gz' --single_end
       - name: BASIC Run the basic pipeline with directly supplied paired-end FASTQ
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --input 'data/testdata/Mammoth/fastq/*_{R1,R2}_*tengrand.fq.gz'
+          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --input 'data/testdata/Mammoth/fastq/*_{R1,R2}_*tengrand.fq.gz'
       - name: BASIC Run the basic pipeline with supplied --input BAM
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --input 'data/testdata/Mammoth/bam/*_R1_*.bam' --bam --single_end
+          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --input 'data/testdata/Mammoth/bam/*_R1_*.bam' --bam --single_end
       - name: BASIC Run the basic pipeline with the test profile with, PE/SE, bwa aln
         run: |
           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --save_reference
@@ -107,30 +107,30 @@ jobs:
       - name: BAM_FILTERING Run basic mapping pipeline with mapping quality filtering, and unmapped export
         run: |
           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_bam_filtering --bam_mapping_quality_threshold 37 --bam_discard_unmapped --bam_unmapped_type 'fastq'
-      - name: DEDUPLICATION Test with markduplicates
+      - name: DEDUPLICATION Test with dedup
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --dedupper 'markduplicates'
+          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --dedupper 'dedup'
       - name: GENOTYPING_HC Test running GATK HaplotypeCaller
         run: |
-         nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_fna,docker  --dedupper 'dedup' --run_genotyping --genotyping_tool 'hc' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_hc_emitrefconf 'BP_RESOLUTION'
+         nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_fna,docker --run_genotyping --genotyping_tool 'hc' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_hc_emitrefconf 'BP_RESOLUTION'
       - name: GENOTYPING_FB Test running FreeBayes
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker  --dedupper 'dedup' --run_genotyping --genotyping_tool 'freebayes'
+          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --genotyping_tool 'freebayes'
       - name: GENOTYPING_PC Test running pileupCaller
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --dedupper 'dedup' --run_genotyping --genotyping_tool 'pileupcaller'
+          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --run_genotyping --genotyping_tool 'pileupcaller'
       - name: GENOTYPING_ANGSD Test running ANGSD genotype likelihood calculation
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --dedupper 'dedup' --run_genotyping --genotyping_tool 'angsd'
+          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --run_genotyping --genotyping_tool 'angsd'
       - name: SKIPPING Test checking all skip steps work i.e. input bam, skipping straight to genotyping
         run: |
           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --skip_fastqc --skip_adapterremoval --skip_deduplication --skip_qualimap --skip_preseq --skip_damage_calculation --run_genotyping --genotyping_tool 'freebayes'
       - name: TRIMBAM Test bamutils works alone
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker  --dedupper 'dedup' --run_trim_bam
+          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_trim_bam
       - name: TRIMBAM Test PMDtools works alone
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker  --dedupper 'dedup' --run_pmdtools
+          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_pmdtools
       - name: GATK 3.5 Download resource files
         run: |
             mkdir -p jars/gatk_3_5
@@ -140,13 +140,13 @@ jobs:
             GATK_JAR=$(readlink -f jars/gatk_3_5/GenomeAnalysisTK.jar)
       - name: GENOTYPING_UG AND MULTIVCFANALYZER Test running GATK UnifiedGenotyper and MultiVCFAnalyzer, additional VCFS
         run: |
-         nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker  --dedupper 'dedup' --run_genotyping --gatk_ug_jar '/home/runner/work/eager/eager/jars/gatk_3_5/GenomeAnalysisTK.jar' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
+         nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --gatk_ug_jar '/home/runner/work/eager/eager/jars/gatk_3_5/GenomeAnalysisTK.jar' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
       - name: COMPLEX LANE/LIBRARY MERGING Test running lane and library merging prior to GATK UnifiedGenotyper and running MultiVCFAnalyzer
         run: |
-         nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker  --dedupper 'dedup' --run_genotyping --gatk_ug_jar '/home/runner/work/eager/eager/jars/gatk_3_5/GenomeAnalysisTK.jar' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
+         nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker --run_genotyping --gatk_ug_jar '/home/runner/work/eager/eager/jars/gatk_3_5/GenomeAnalysisTK.jar' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
       - name: GENOTYPING_UG ON TRIMMED BAM Test
         run: |
-         nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker  --dedupper 'dedup' --run_genotyping --run_trim_bam --genotyping_source 'trimmed' --gatk_ug_jar '/home/runner/work/eager/eager/jars/gatk_3_5/GenomeAnalysisTK.jar' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP'
+         nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --run_trim_bam --genotyping_source 'trimmed' --gatk_ug_jar '/home/runner/work/eager/eager/jars/gatk_3_5/GenomeAnalysisTK.jar' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP'
       - name: BAM_INPUT Run the basic pipeline with the bam input profile, skip AdapterRemoval as no convertBam
         run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --skip_adapterremoval --run_convertinputbam
 
@@ -28,6 +28,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 * [#504] Removed sexdeterrmine-snps plot from MultiQC report.
 * Nuclear contamination results are now shown in the MultiQC report.
 * Nuclear contamination is now reported with the correct library names.
+* Tutorial on how to use profiles for reproducible science (i.e. parameter sharing between different groups)
 
 ### `Fixed`
 
@@ -43,6 +44,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 * [#444](https://github.com/nf-core/eager/issues/444) - Provide option for preserving realigned bam + index
 * Increase MultiQC process memory requirements to ensure enough memory for large runs
 * Fixed deduplication output logic. Will now pass along only the post-rmdup bams if duplicate removal is not skipped, instead of both the post-rmdup and pre-rmdup bams.
+* [#497](https://github.com/nf-core/eager/issues/497) - Simplifies number of parameters required to run bam filtering
+* [#501](https://github.com/nf-core/eager/issues/501) - Adds additional validation checks for MALT/MaltExtract database input files
+* [#508](https://github.com/nf-core/eager/issues/508) - Made Markduplicates default dedupper due to narrower context specificity of dedup
+* [#516](https://github.com/nf-core/eager/issues/516) - Made bedtools not report out of memory exit code when warning of inconsistant FASTA/Bed entry names
 
 ### `Dependencies`
 
@@ -60,6 +65,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 * Latest version of xopen (0.9.0)
 * Added Bowtie 2 (2.4.1)
 * Latest version of Sex.DetERRmine (1.1.2)
+* Latest version of endorS.py (0.3)
 
 ## [2.1.0] - 2020-03-05 - "Ravensburg"
 
 
@@ -45,6 +45,10 @@ By default the pipeline currently performs the following:
 
 Additional functionality contained by the pipeline currently includes:
 
+#### Input
+
+* Automatic merging of complex sequencing setups (e.g. multiple lanes, sequencing configurations, library types)
+
 #### Preprocessing
 
 * Illumina two-coloured sequencer poly-G tail removal (`fastp`)
 
@@ -0,0 +1 @@
+Empty dummy file for processes with optional input but channels still need to be filled.
@@ -93,7 +93,6 @@ top_modules:
     - 'mtnucratio'
     - 'qualimap'
     - 'sexdeterrmine'
-    - 'gatk'
     - 'multivcfanalyzer':
        path_filters:
            - '*MultiVCFAnalyzer.json'
 
@@ -23,7 +23,7 @@
    '''))
 parser.add_argument('samtoolsfiles', metavar='<samplefile>.stats', type=str, nargs='+',
                     help='output of samtools flagstat in a txt file (at least one required). If two files are supplied, the mapped reads of the second file is divided by the total reads in the first, since it assumes that the <samplefile.stats> are related to the same sample. Useful after BAM filtering')
-parser.add_argument('-v','--version', action='version', version='%(prog)s 0.2')
+parser.add_argument('-v','--version', action='version', version='%(prog)s 0.3')
 parser.add_argument('--output', '-o', nargs='?', help='specify a file format for an output file. Options: <json> for a MultiQC json output. Default: none')
 parser.add_argument('--name', '-n', nargs='?', help='specify name for the output file. Default: extracted from the first samtools flagstat file provided')
 args = parser.parse_args()
@@ -37,7 +37,14 @@
     #Extract number of mapped reads pre-quality filtering:
     mappedPre = float((re.findall(r'([0-9]+) \+ [0-9]+ mapped ',contentsPre))[0])
     #Calculation of endogenous DNA pre-quality filtering:
-    endogenousPre = float("{0:.2f}".format(round((mappedPre / totalReads * 100), 2)))
+    if totalReads == 0.0:
+        endogenousPre = 0.000000
+        print("WARNING: no reads in the fastq input, Endogenous DNA raw (%) set to 0.000000")
+    elif mappedPre == 0.0:
+        endogenousPre = 0.000000
+        print("WARNING: no mapped reads, Endogenous DNA raw (%) set to 0.000000")
+    else:
+        endogenousPre = float("{0:.6f}".format(round((mappedPre / totalReads * 100), 6)))
 except:
     print("Incorrect input, please provide at least a samtools flag stats as input\nRun:\npython endorS.py --help \nfor more information on how to run this script")
     sys.exit()
@@ -49,7 +56,14 @@
     #Extract number of mapped reads post-quality filtering:
     mappedPost = float((re.findall(r'([0-9]+) \+ [0-9]+ mapped',contentsPost))[0])
     #Calculation of endogenous DNA post-quality filtering:
-    endogenousPost = float("{0:.2f}".format(round((mappedPost / totalReads * 100),2)))
+    if totalReads == 0.0:
+        endogenousPost = 0.000000
+        print("WARNING: no reads in the fastq input, Endogenous DNA modified (%) set to 0.000000")
+    elif mappedPost == 0.0:
+        endogenousPost = 0.000000
+        print("WARNING: no mapped reads, Endogenous DNA modified (%) set to 0.000000")
+    else:
+        endogenousPost = float("{0:.6f}".format(round((mappedPost / totalReads * 100),6)))
 except:
     print("Only one samtools flagstat file provided")
     #Set the number of reads post-quality filtering to 0 if samtools
@@ -68,9 +82,10 @@
 if mappedPost == "NA":
     #Creating the json file
     jsonOutput={
+    "id": "endorS.py ",
     "plot_type": "generalstats",
     "pconfig": {
-        "endogenous_dna": { "max": 100, "min": 0, "title": "Endogenous DNA (%)"}
+        "endogenous_dna": { "max": 100, "min": 0, "title": "Endogenous DNA (%)", "format": '{:,.2f}'}
     },
     "data": {
         name : { "endogenous_dna": endogenousPre}
@@ -79,14 +94,15 @@
 else:
     #Creating the json file
     jsonOutput={
+    "id": "endorS.py ",
     "plot_type": "generalstats",
     "pconfig": {
-        "endogenous_dna": { "max": 100, "min": 0, "title": "Endogenous DNA (%)"},
-        "endogenous_dna_post": { "max": 100, "min": 0, "title": "Endogenous DNA Post (%)"}
+        "endogenous_dna": { "max": 100, "min": 0, "title": "Endogenous DNA (%)", "format": '{:,.2f}'},
+        "endogenous_dna_post": { "max": 100, "min": 0, "title": "Endogenous DNA Post (%)", "format": '{:,.2f}'}
     },
     "data": {
         name : { "endogenous_dna": endogenousPre, "endogenous_dna_post": endogenousPost}
-    }
+    },
     }
 #Checking for print to screen argument:
 if args.output is not None:
 
@@ -82,22 +82,22 @@ process {
 
   // Add 141 ignore due to unclean pipe closing by pmdtools https://github.com/pontussk/PMDtools/issues/7
   withName: pmdtools {
-    errorStrategy = { task.exitStatus in [141] ? 'ignore' : 'retry' }
+    errorStrategy = { task.exitStatus in [143,137,104,134,139,141] ? 'ignore' : 'retry' }
   }
 
   // Add 1 retry for certain java tools as not enough heap space java errors gives exit code 1
   withName: dedup {
-    errorStrategy = { task.exitStatus in [1] ? 'retry' : 'finish' } 
+    errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : 'finish' } 
   }
 
   // Add 1 retry as not enough heapspace java error gives exit code 1
   withName: malt {
-    errorStrategy = { task.exitStatus in [1] ? 'retry' : 'finish' } 
+    errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : 'finish' } 
   }
 
   // other process specific exit statuses
   withName: nuclear_contamination {
-    errorStrategy = { task.exitStatus in [134] ? 'ignore' : 'retry' }
+    errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'ignore' : 'retry' }
   }
 
   withName: multiqc {
 
@@ -17,9 +17,6 @@ params {
   genome = false
   //Input data
   single_end = false
-  readPaths = [['JK2782_TGGCCGATCAACGA_L008', ['https://github.com/nf-core/test-datasets/raw/eager/testdata/Mammoth/fastq/JK2782_TGGCCGATCAACGA_L008_R1_001.fastq.gz.tengrand.fq.gz','https://github.com/nf-core/test-datasets/raw/eager/testdata/Mammoth/fastq/JK2782_TGGCCGATCAACGA_L008_R2_001.fastq.gz.tengrand.fq.gz']],
-  ['JK2802_AGAATAACCTACCA_L008', ['https://github.com/nf-core/test-datasets/raw/eager/testdata/Mammoth/fastq/JK2802_AGAATAACCTACCA_L008_R1_001.fastq.gz.tengrand.fq.gz','https://github.com/nf-core/test-datasets/raw/eager/testdata/Mammoth/fastq/JK2802_AGAATAACCTACCA_L008_R2_001.fastq.gz.tengrand.fq.gz']],
-  ]
   // Genome references
   fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Mammoth/Mammoth_MT_Krause.fasta'
 }
-Original file line number
+Diff line change
         - img
         - p
         - kbd
 +        - details
 +        - summary
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Empty dummy file for processes with optional input but channels still need to be filled.`
Original file line number	Diff line number	Diff line change
`@@ -82,22 +82,22 @@ process {`
`82`	`82`
`83`	`83`	`// Add 141 ignore due to unclean pipe closing by pmdtools https://github.com/pontussk/PMDtools/issues/7`
`84`	`84`	`withName: pmdtools {`
`85`		`- errorStrategy = { task.exitStatus in [141] ? 'ignore' : 'retry' }`
	`85`	`+ errorStrategy = { task.exitStatus in [143,137,104,134,139,141] ? 'ignore' : 'retry' }`
`86`	`86`	`}`
`87`	`87`
`88`	`88`	`// Add 1 retry for certain java tools as not enough heap space java errors gives exit code 1`
`89`	`89`	`withName: dedup {`
`90`		`- errorStrategy = { task.exitStatus in [1] ? 'retry' : 'finish' }`
	`90`	`+ errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : 'finish' }`
`91`	`91`	`}`
`92`	`92`
`93`	`93`	`// Add 1 retry as not enough heapspace java error gives exit code 1`
`94`	`94`	`withName: malt {`
`95`		`- errorStrategy = { task.exitStatus in [1] ? 'retry' : 'finish' }`
	`95`	`+ errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : 'finish' }`
`96`	`96`	`}`
`97`	`97`
`98`	`98`	`// other process specific exit statuses`
`99`	`99`	`withName: nuclear_contamination {`
`100`		`- errorStrategy = { task.exitStatus in [134] ? 'ignore' : 'retry' }`
	`100`	`+ errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'ignore' : 'retry' }`
`101`	`101`	`}`
`102`	`102`
`103`	`103`	`withName: multiqc {`