nf-core
diff --git a/‎.travis.yml‎
Lines changed: 8 additions & 8 deletions b/‎.travis.yml‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 19 additions & 1 deletion b/‎CHANGELOG.md‎
Lines changed: 19 additions & 1 deletion
diff --git a/‎Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 75 additions & 22 deletions b/‎README.md‎
Lines changed: 75 additions & 22 deletions
diff --git a/‎Singularity‎
Lines changed: 2 additions & 2 deletions b/‎Singularity‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎conf/acad-pheonix.config‎
Lines changed: 22 additions & 0 deletions b/‎conf/acad-pheonix.config‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎conf/binac.config‎
Lines changed: 1 addition & 1 deletion b/‎conf/binac.config‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎conf/multiqc_config.yaml‎
Lines changed: 12 additions & 0 deletions b/‎conf/multiqc_config.yaml‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎conf/shh.config‎
Lines changed: 36 additions & 0 deletions b/‎conf/shh.config‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎docs/configuration/adding_your_own.md‎
Lines changed: 16 additions & 3 deletions b/‎docs/configuration/adding_your_own.md‎
Lines changed: 16 additions & 3 deletions
@@ -11,9 +11,9 @@ before_install:
   # PRs to master are only ok if coming from dev branch
   - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && [ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ])'
   # Pull the docker image first so the test doesn't wait for this
-  - docker pull nfcore/eager
+  - docker pull nfcore/eager:dev
   # Fake the tag locally so that the pipeline runs properly
-  - docker tag nfcore/eager nfcore/eager:2.0.2
+  - docker tag nfcore/eager:dev nfcore/eager:2.0.3
 
 install:
   # Install Nextflow
@@ -37,16 +37,16 @@ script:
   # Lint the pipeline code
   - nf-core lint ${TRAVIS_BUILD_DIR}
   # Run the basic pipeline with the test profile
-  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd
+  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --saveReference
   # Run the basic pipeline with single end data (pretending its single end actually)
-  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --singleEnd 
+  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --singleEnd --bwa_index results/reference_genome/bwa_index/
   # Run the same pipeline testing optional step: fastp, complexity 
-  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --complexity_filter 
+  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --complexity_filter --bwa_index results/reference_genome/bwa_index/
   # Test BAM Trimming
-  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --trim_bam 
+  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --trim_bam --bwa_index results/reference_genome/bwa_index/
   # Test running with CircularMapper
   - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --circularmapper --circulartarget 'NC_007596.2'
   # Test running with BWA Mem
-  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --bwamem
+  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --bwamem --bwa_index results/reference_genome/bwa_index/
   # Test basic pipeline with Conda too 
-  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,conda --pairedEnd
+  - travis_wait 25 nextflow run ${TRAVIS_BUILD_DIR} -profile test,conda --pairedEnd --bwa_index results/reference_genome/bwa_index/
@@ -4,8 +4,26 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
-## unpublished
+## [Unpublished]
 
+### `Added`
+* [#80](https://github.com/nf-core/eager/pull/80) - BWA Index file handling 
+* [#77](https://github.com/nf-core/eager/pull/77) - Lots of documentation updates by [@jfy133](https://github.com/jfy133)
+
+## [2.0.3] - 2018-12-09
+
+### `Added`
+* [#80](https://github.com/nf-core/eager/pull/80) - BWA Index file handling 
+* [#77](https://github.com/nf-core/eager/pull/77) - Lots of documentation updates by [@jfy133](https://github.com/jfy133)
+* [#81](https://github.com/nf-core/eager/pull/81) - Renaming of certain BAM options
+* [#92](https://github.com/nf-core/eager/issues/92) - Complete restructure of BAM options
+
+### `Fixed`
+* [#84](https://github.com/nf-core/eager/pull/85) - Fix for [Samtools index issues](https://github.com/nf-core/eager/issues/84)
+* [#96](https://github.com/nf-core/eager/issues/96) - Fix for [MarkDuplicates issues](https://github.com/nf-core/eager/issues/96) found by [@nilesh-tawari](https://github.com/nilesh-tawari)
+
+### Other
+* Added Slack button to repository readme
 
 ## [2.0.2] - 2018-11-03
 
 
@@ -3,4 +3,4 @@ FROM nfcore/base
 LABEL description="Docker image containing all requirements for nf-core/eager pipeline"
 COPY environment.yml /
 RUN conda env create -f /environment.yml && conda clean -a
-ENV PATH /opt/conda/envs/nf-core-eager-2.0.2/bin:$PATH
+ENV PATH /opt/conda/envs/nf-core-eager-2.0.3/bin:$PATH
@@ -2,8 +2,7 @@
 
 [![Build Status](https://travis-ci.org/nf-core/eager.svg?branch=master)](https://travis-ci.org/nf-core/eager)
 [![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.32.0-brightgreen.svg)](https://www.nextflow.io/)
-[![Gitter](https://img.shields.io/badge/gitter-%20join%20chat%20%E2%86%92-4fb99a.svg)](https://gitter.im/nf-core/eager)
-[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/)
+[![Slack Status](https://nf-core-invite.herokuapp.com/badge.svg)](https://nf-core-invite.herokuapp.com)[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/)
 [![Docker Container available](https://img.shields.io/docker/automated/nfcore/eager.svg)](https://hub.docker.com/r/nfcore/eager/)
 ![Singularity Container available](https://img.shields.io/badge/singularity-available-7E4C74.svg)
 [![DOI](https://zenodo.org/badge/135918251.svg)](https://zenodo.org/badge/latestdoi/135918251)
@@ -12,28 +11,60 @@
 
 ## Introduction
 
-**nf-core/eager** is a bioinformatics best-practice analysis pipeline for ancient DNA data analysis.
+**nf-core/eager** is a bioinformatics best-practice analysis pipeline for NGS 
+sequencing based ancient DNA (aDNA) data analysis.
 
-The pipeline uses [Nextflow](https://www.nextflow.io), a bioinformatics workflow tool. It pre-processes raw data from FastQ inputs, aligns the reads and performs extensive quality-control on the results. It comes with docker / singularity containers making installation trivial and results highly reproducible.
+The pipeline uses [Nextflow](https://www.nextflow.io), a bioinformatics 
+workflow tool. It pre-processes raw data from FASTQ inputs, aligns the reads 
+and performs extensive general NGS and aDNA specific quality-control on the 
+results. It comes with docker, singularity or conda containers making 
+installation trivial and results highly reproducible.
 
-### Pipeline steps
+## Pipeline steps
 
-* Create reference genome indices (optional)
-    * BWA 
-    * Samtools Index
-    * Sequence Dictionary
-* QC with FastQC
-* AdapterRemoval for read clipping and merging
-* Read mapping with BWA, BWA Mem or CircularMapper
-* Samtools sort, index, stats & conversion to BAM
-* DeDup or MarkDuplicates read deduplication
-* QualiMap BAM QC Checking
-* Preseq Library Complexity Estimation
-* DamageProfiler damage profiling
-* BAM Clipping for UDG+/UDGhalf protocols
-* PMDTools damage filtering / assessment
+By default the pipeline currently performs the following:
+
+* Create reference genome indices for mapping (`bwa`, `samtools`, and `picard`)
+* Sequencing quality control (`FastQC`)
+* Sequencing adapter removal and for paired end data merging (`AdapterRemoval`)
+* Read mapping to reference using (`bwa aln`, `bwa mem` or `CircularMapper`)
+* Post-mapping processing, statistics and conversion to bam (`samtools`)
+* Ancient DNA C-to-T damage pattern visualisation (`DamageProfiler`)
+* PCR duplicate removal (`DeDup` or `MarkDuplicates`)
+* Post-mapping statistics and BAM quality control (`Qualimap`)
+* Library Complexity Estimation (`preseq`)
+* Overall pipeline statistics summaries (`MultiQC`)
+
+Additional functionality contained by the pipeline currently includes:
+
+* Illumina two-coloured sequencer poly-G tail removal (`fastp`)
+* Automatic conversion of unmapped reads to FASTQ (`samtools`)
+* Damage removal/clipping for UDG+/UDG-half treatment protocols (`BamUtil`)
+* Damage reads extraction and assessment (`PMDTools`)
+
+## Quick Start
+
+1. Install [`nextflow`](docs/installation.md)
+2. Install one of [`docker`](https://docs.docker.com/engine/installation/), [`singularity`](https://www.sylabs.io/guides/3.0/user-guide/) or [`conda`](https://conda.io/miniconda.html)
+3. Download the EAGER pipeline
+
+```bash
+nextflow pull nf-core/eager
+```
+
+4. Set up your job with default parameters
+
+```bash
+nextflow run nf-core -profile <docker/singularity/conda> --reads'*_R{1,2}.fastq.gz' --fasta '<REFERENCE.fasta'
+```
+
+5. See the overview of the run with under `<OUTPUT_DIR>/MultiQC/multiqc_report.html`
+
+Modifications to the default pipeline are easily made using various options
+as described in the documentation.
+
+## Documentation
 
-### Documentation
 The nf-core/eager pipeline comes with documentation about the pipeline, found in the `docs/` directory:
 
 1. [Installation](docs/installation.md)
@@ -44,5 +75,27 @@ The nf-core/eager pipeline comes with documentation about the pipeline, found in
 4. [Output and how to interpret the results](docs/output.md)
 5. [Troubleshooting](docs/troubleshooting.md)
 
-### Credits
-This pipeline was written by Alexander Peltzer ([apeltzer](https://github.com/apeltzer)), with major contributions from Stephen Clayton, ideas and documentation from James Fellows-Yates, Raphael Eisenhofer and Judith Neukamm. If you want to contribute, please open an issue and ask to be added to the project - happy to do so and everyone is welcome to contribute here!
+
+## Credits
+
+This pipeline was written by Alexander Peltzer ([apeltzer](https://github.com/apeltzer)), 
+with major contributions from Stephen Clayton, ideas and documentation from 
+James Fellows Yates, Raphael Eisenhofer and Judith Neukamm. If you want to 
+contribute, please open an issue and ask to be added to the project - happy to 
+do so and everyone is welcome to contribute here!
+
+## Tool References
+
+* **EAGER v1**, CircularMapper, DeDup* Peltzer, A., Jäger, G., Herbig, A., Seitz, A., Kniep, C., Krause, J., & Nieselt, K. (2016). EAGER: efficient ancient genome reconstruction. Genome Biology, 17(1), 1–14. [https://doi.org/10.1186/s13059-016-0918-z](https://doi.org/10.1186/s13059-016-0918-z)  Download: [https://github.com/apeltzer/EAGER-GUI](https://github.com/apeltzer/EAGER-GUI) and [https://github.com/apeltzer/EAGER-CLI](https://github.com/apeltzer/EAGER-CLI)
+* **FastQC** download: [https://www.bioinformatics.babraham.ac.uk/projects/fastqc/](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
+* **AdapterRemoval v2** Schubert, M., Lindgreen, S., & Orlando, L. (2016). AdapterRemoval v2: rapid adapter trimming, identification, and read merging. BMC Research Notes, 9, 88. [https://doi.org/10.1186/s13104-016-1900-2](https://doi.org/10.1186/s13104-016-1900-2) Download: [https://github.com/MikkelSchubert/adapterremoval](https://github.com/MikkelSchubert/adapterremoval)
+* **bwa** Li, H., & Durbin, R. (2009). Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics , 25(14), 1754–1760. [https://doi.org/10.1093/bioinformatics/btp324](https://doi.org/10.1093/bioinformatics/btp324) Download: [http://bio-bwa.sourceforge.net/bwa.shtml](http://bio-bwa.sourceforge.net/bwa.shtml)
+* **SAMtools** Li, H., Handsaker, B., Wysoker, A., Fennell, T., Ruan, J., Homer, N., … 1000 Genome Project Data Processing Subgroup. (2009). The Sequence Alignment/Map format and SAMtools. Bioinformatics , 25(16), 2078–2079. [https://doi.org/10.1093/bioinformatics/btp352](https://doi.org/10.1093/bioinformatics/btp352) Download: [http://www.htslib.org/](http://www.htslib.org/)
+* **DamageProfiler** Judith Neukamm (Unpublished)
+* **QualiMap** Okonechnikov, K., Conesa, A., & García-Alcalde, F. (2016). Qualimap 2: advanced multi-sample quality control for high-throughput sequencing data. Bioinformatics , 32(2), 292–294. [https://doi.org/10.1093/bioinformatics/btv566](https://doi.org/10.1093/bioinformatics/btv566) Download: [http://qualimap.bioinfo.cipf.es/](http://qualimap.bioinfo.cipf.es/)
+* **preseq** Daley, T., & Smith, A. D. (2013). Predicting the molecular complexity of sequencing libraries. Nature Methods, 10(4), 325–327. [https://doi.org/10.1038/nmeth.2375](https://doi.org/10.1038/nmeth.2375). Download: [http://smithlabresearch.org/software/preseq/](http://smithlabresearch.org/software/preseq/)
+* **PMDTools** Skoglund, P., Northoff, B. H., Shunkov, M. V., Derevianko, A. P., Pääbo, S., Krause, J., & Jakobsson, M. (2014). Separating endogenous ancient DNA from modern day contamination in a Siberian Neandertal. Proceedings of the National Academy of Sciences of the United States of America, 111(6), 2229–2234. [https://doi.org/10.1073/pnas.1318934111](https://doi.org/10.1073/pnas.1318934111) Download: [https://github.com/pontussk/PMDtools](https://github.com/pontussk/PMDtools)
+* **MultiQC** Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. [https://doi.org/10.1093/bioinformatics/btw354](https://doi.org/10.1093/bioinformatics/btw354) Download: [https://multiqc.info/](https://multiqc.info/)
+* **BamUtils** Jun, G., Wing, M. K., Abecasis, G. R., & Kang, H. M. (2015). An efficient and scalable analysis framework for variant extraction and refinement from population-scale DNA sequence data. Genome Research, 25(6), 918–925. [https://doi.org/10.1101/gr.176552.114](https://doi.org/10.1101/gr.176552.114) Download: [https://genome.sph.umich.edu/wiki/BamUtil](https://genome.sph.umich.edu/wiki/BamUtil)
+* **FastP** Chen, S., Zhou, Y., Chen, Y., & Gu, J. (2018). fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics , 34(17), i884–i890. [https://doi.org/10.1093/bioinformatics/bty560](https://doi.org/10.1093/bioinformatics/bty560) Download: [https://github.com/OpenGene/fastp](https://github.com/OpenGene/fastp)
+
@@ -4,10 +4,10 @@ Bootstrap:docker
 %labels
     MAINTAINER Alexander Peltzer <alexander.peltzer@qbic.uni-tuebingen.de>
     DESCRIPTION Container image containing all requirements for the nf-core/eager pipeline
-    VERSION 2.0.2
+    VERSION 2.0.3
 
 %environment
-    PATH=/opt/conda/envs/nf-core-eager-2.0.2/bin:$PATH
+    PATH=/opt/conda/envs/nf-core-eager-2.0.3/bin:$PATH
     export PATH
 
 %files
 
@@ -0,0 +1,22 @@
+/*
+ * ----------------------------------------------------------------------------
+ *  Nextflow config file for use with Singularity on Phoenix Cluster Adelaide
+ * ----------------------------------------------------------------------------
+ * Defines basic usage limits and singularity image id.
+ */
+
+singularity {
+    enabled = true
+    autoMounts = true
+}
+
+process {
+    beforeScript = 'module load Singularity/2.5.2-GCC-5.4.0-2.26'
+    executor = 'slurm'
+}
+
+params {
+  max_memory = 128.GB
+  max_cpus = 32
+  max_time = 48.h
+}
@@ -10,7 +10,7 @@ singularity {
 }
 
 process {
-    beforeScript = 'module load devel/singularity/2.4.1'
+    beforeScript = 'module load devel/singularity/2.6.0'
     executor = 'pbs'
     queue = 'short'
 }
 
@@ -5,3 +5,15 @@ report_comment: >
 report_section_order:
     nf-core/eager-software-versions:
         order: -1000
+    fastqc:
+        after: 'nf-core/eager-software-versions'
+    adapterRemoval:
+        after: 'fastqc'
+    Samtools:
+        after: 'adapterRemoval'
+    dedup:
+        after: 'Samtools'
+    qualimap:
+        after: 'dedup'
+    preseq:
+        after: 'qualimap'
@@ -0,0 +1,36 @@
+/*
+ * -------------------------------------------------------------
+ *  Nextflow config file for use with Singularity at SHH Clusters
+ * -------------------------------------------------------------
+ * Defines basic usage limits and singularity image id.
+ */
+
+singularity {
+    enabled = true
+}
+
+/*
+* To be improved by process specific resource requests
+* By default, take the medium queue, smaller processes might just go to short (e.g. multiqc or similar things)
+*/
+
+process {
+    executor = 'slurm'
+    queue = 'medium'
+
+    
+    withName:makeFastaIndex {
+        queue = 'short'
+        time = 2.h
+    }
+    withName:makeSeqDict {
+        queue = 'short'
+        time = 2.h
+    }
+}
+
+params {
+  max_memory = 734.GB
+  max_cpus = 64
+  max_time = 48.h
+}
@@ -28,7 +28,6 @@ process {
 }
 ```
 
-
 ## Software Requirements
 To run the pipeline, several software packages are required. How you satisfy these requirements is essentially up to you and depends on your system. If possible, we _highly_ recommend using either Docker or Singularity.
 Please see the [`installation documentation`](../installation.md) for how to run using the below as a one-off. These instructions are about configuring a config file for repeated use.
@@ -51,7 +50,6 @@ Note that the dockerhub organisation name annoyingly can't have a hyphen, so is
 ### Singularity image
 Many HPC environments are not able to run Docker due to security issues.
 [Singularity](http://singularity.lbl.gov/) is a tool designed to run on such HPC systems which is very similar to Docker.
->>>>>>> TEMPLATE
 
 To specify singularity usage in your pipeline config file, add the following:
 
@@ -81,5 +79,20 @@ To use conda in your own config file, add the following:
 
 ```nextflow
 process.conda = "$baseDir/environment.yml"
->>>>>>> TEMPLATE
 ```
+
+## Job Resources
+#### Automatic resubmission
+Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with an error code of `143` (exceeded requested resources) it will automatically resubmit with higher requests (2 x original, then 3 x original). If it still fails after three times then the pipeline is stopped.
+
+#### Custom resource requests
+Wherever process-specific requirements are set in the pipeline, the default value can be changed by creating a custom config file. See the files in [`conf`](../conf) for examples.
+
+### AWS Batch specific parameters
+Running the pipeline on AWS Batch requires a couple of specific parameters to be set according to your AWS Batch configuration. Please use the `-awsbatch` profile and then specify all of the following parameters.
+#### `--awsqueue`
+The JobQueue that you intend to use on AWS Batch.
+#### `--awsregion`
+The AWS region to run your job in. Default is set to `eu-west-1` but can be adjusted to your needs.
+
+Please make sure to also set the `-w/--work-dir` and `--outdir` parameters to a S3 storage bucket of your choice - you'll get an error message notifying you if you didn't.
Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@ singularity {`
`10`	`10`	`}`
`11`	`11`
`12`	`12`	`process {`
`13`		`- beforeScript = 'module load devel/singularity/2.4.1'`
	`13`	`+ beforeScript = 'module load devel/singularity/2.6.0'`
`14`	`14`	`executor = 'pbs'`
`15`	`15`	`queue = 'short'`
`16`	`16`	`}`