diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index a2f10786e..fe79bfed3 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -6,7 +6,9 @@ We try to manage the required tasks for nf-core/eager using GitHub issues, you p However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -> If you need help using or modifying nf-core/eager then the best place to go is the Gitter chatroom where you can ask us questions directly: https://gitter.im/nf-core/Lobby +> If you need help using or modifying nf-core/eager then the best place to ask is on the pipeline channel on [Slack](https://nf-core-invite.herokuapp.com/). + + ## Contribution workflow If you'd like to write some code for nf-core/eager, the standard workflow @@ -30,7 +32,7 @@ Typically, pull-requests are only fully reviewed when these tests are passing, t There are typically two types of tests that run: ### Lint Tests -The nf-core has a [set of guidelines](http://nf-co.re/developer_docs) which all pipelines must adhere to. +The nf-core has a [set of guidelines](http://nf-co.re/guidelines) which all pipelines must adhere to. To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. If any failures or warnings are encountered, please follow the listed URL for more documentation. @@ -42,4 +44,4 @@ If there are any failures then the automated tests fail. These tests are run both with the latest available version of Nextflow and also the minimum required version that is stated in the pipeline code. ## Getting help -For further information/help, please consult the [nf-core/eager documentation](https://github.com/nf-core/eager#documentation) and don't hesitate to get in touch on [Gitter](https://gitter.im/nf-core/Lobby) +For further information/help, please consult the [nf-core/eager documentation](https://github.com/nf-core/eager#documentation) and don't hesitate to get in touch on the pipeline channel on [Slack](https://nf-core-invite.herokuapp.com/). diff --git a/.github/markdownlint.yml b/.github/markdownlint.yml new file mode 100644 index 000000000..e052a635a --- /dev/null +++ b/.github/markdownlint.yml @@ -0,0 +1,9 @@ +# Markdownlint configuration file +default: true, +line-length: false +no-multiple-blanks: 0 +blanks-around-headers: false +blanks-around-lists: false +header-increment: false +no-duplicate-header: + siblings_only: true diff --git a/.gitignore b/.gitignore index 46f69e414..5b54e3e6c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ data/ results/ .DS_Store tests/test_data +*.pyc diff --git a/.travis.yml b/.travis.yml index 82b3f9627..83a7bfdea 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,6 +13,7 @@ before_install: # Pull the docker image first so the test doesn't wait for this - docker pull nfcore/eager:dev # Fake the tag locally so that the pipeline runs properly + # Looks weird when this is :dev to :dev, but makes sense when testing code for a release (:dev to :1.0.1) - docker tag nfcore/eager:dev nfcore/eager:dev install: @@ -21,6 +22,7 @@ install: - wget -qO- get.nextflow.io | bash - sudo ln -s /tmp/nextflow/nextflow /usr/local/bin/nextflow # Install nf-core/tools + - pip install --upgrade pip - pip install nf-core # Install Conda - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh @@ -28,6 +30,8 @@ install: - export PATH="$HOME/miniconda/bin:$PATH" # Reset - mkdir ${TRAVIS_BUILD_DIR}/tests && cd ${TRAVIS_BUILD_DIR}/tests + # Install markdownlint-cli + - sudo apt-get install npm && npm install -g markdownlint-cli env: - NXF_VER='0.32.0' # Specify a minimum NF version that should be tested and work diff --git a/CHANGELOG.md b/CHANGELOG.md index 2295d2989..4901be67c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * [#186](https://github.com/nf-core/eager/pull/186) - Make FastQC skipping [possible] /(https://github.com/nf-core/eager/issues/182) +* Merged in [nf-core/tools](https://github.com/nf-core/tools) release V1.6 template changes ### `Fixed` * [#172](https://github.com/nf-core/eager/pull/152) - DamageProfiler errors [won't crash entire pipeline anymore](https://github.com/nf-core/eager/issues/171) @@ -19,6 +20,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * Added DeDup v0.12.4 (json support) * Added mtnucratio v0.5 (json support) +* Updated Picard 2.18.27 -> 2.19.0 +* Updated GATK 4.1.0.0 -> 4.1.1.1 +* Updated damageprofiler 0.4.4 -> 0.4.5 +* Updated r-rmarkdown 1.11 -> 1.12 ## [2.0.6] - 2019-03-05 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 21096193a..09226d0d8 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -34,7 +34,7 @@ This Code of Conduct applies both within project spaces and in public spaces whe ## Enforcement -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on the [Gitter channel](https://gitter.im/nf-core/Lobby). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](https://nf-core-invite.herokuapp.com/). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. diff --git a/LICENSE b/LICENSE index 176423c26..904d4f8c9 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2018 Alexander Peltzer +Copyright (c) Alexander Peltzer, Stephen Clayton, James A. Fellows Yates, Maxime Borry Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index df99db945..f4c7bbaf5 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,14 @@ # ![nf-core/eager](docs/images/eager_logo.png) -[![Build Status](https://travis-ci.org/nf-core/eager.svg?branch=master)](https://travis-ci.org/nf-core/eager) +**A fully reproducible ancient and modern DNA pipeline in Nextflow and with cloud support.**. + +[![Build Status](https://travis-ci.com/nf-core/eager.svg?branch=master)](https://travis-ci.com/nf-core/eager) [![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.32.0-brightgreen.svg)](https://www.nextflow.io/) [![Slack Status](https://nf-core-invite.herokuapp.com/badge.svg)](https://nf-core-invite.herokuapp.com)[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/) [![Docker Container available](https://img.shields.io/docker/automated/nfcore/eager.svg)](https://hub.docker.com/r/nfcore/eager/) ![Singularity Container available](https://img.shields.io/badge/singularity-available-7E4C74.svg) [![DOI](https://zenodo.org/badge/135918251.svg)](https://zenodo.org/badge/latestdoi/135918251) - - ## Introduction **nf-core/eager** is a bioinformatics best-practice analysis pipeline for NGS @@ -44,7 +44,7 @@ Additional functionality contained by the pipeline currently includes: ## Quick Start -1. Install [`nextflow`](docs/installation.md) +1. Install [`nextflow`](https://nf-co.re/usage/installation) 2. Install one of [`docker`](https://docs.docker.com/engine/installation/), [`singularity`](https://www.sylabs.io/guides/3.0/user-guide/) or [`conda`](https://conda.io/miniconda.html) @@ -73,22 +73,24 @@ as described in the documentation. ## Documentation -The nf-core/eager pipeline comes with documentation about the pipeline, found in the `docs/` directory: +The nf-core/eager pipeline comes with documentation about the pipeline, found in the `docs/` directory or on the main homepage of the nf-core project: -1. [Installation](docs/installation.md) +1. [Nextflow Installation](https://nf-co.re/usage/installation) 2. Pipeline configuration - * [Local installation](docs/configuration/local.md) - * [Adding your own system](docs/configuration/adding_your_own.md) + * [Pipeline installation](https://nf-co.re/usage/local_installation) + * [Adding your own system config](https://nf-co.re/usage/adding_own_config) + * [Reference genomes](https://nf-co.re/usage/reference_genomes) 3. [Running the pipeline](docs/usage.md) 4. [Output and how to interpret the results](docs/output.md) -5. [Troubleshooting](docs/troubleshooting.md) +5. [Troubleshooting](https://nf-co.re/usage/troubleshooting) +6. [EAGER Troubleshooting](docs/troubleshooting.md) ## Credits This pipeline was written by Alexander Peltzer ([apeltzer](https://github.com/apeltzer)), with major contributions from Stephen Clayton, ideas and documentation from -James Fellows Yates, Raphael Eisenhofer and Judith Neukamm. If you want to +James A. Fellows Yates, Raphael Eisenhofer, Maxime Borry and Judith Neukamm. If you want to contribute, please open an issue and ask to be added to the project - happy to do so and everyone is welcome to contribute here! diff --git a/Singularity b/Singularity deleted file mode 100644 index e4123fa14..000000000 --- a/Singularity +++ /dev/null @@ -1,18 +0,0 @@ -From:nfcore/base -Bootstrap:docker - -%labels - MAINTAINER Alexander Peltzer - DESCRIPTION Container image containing all requirements for the nf-core/eager pipeline - VERSION 2.0.7dev - -%environment - PATH=/opt/conda/envs/nf-core-eager-2.0.7dev/bin:$PATH - export PATH - -%files - environment.yml / - -%post - /opt/conda/bin/conda env create -f /environment.yml - /opt/conda/bin/conda clean -a diff --git a/assets/email_template.html b/assets/email_template.html index 4803eddc3..bb88fe4e1 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -5,7 +5,7 @@ - + nf-core/eager Pipeline Report diff --git a/conf/multiqc_config.yaml b/assets/multiqc_config.yaml similarity index 100% rename from conf/multiqc_config.yaml rename to assets/multiqc_config.yaml diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index fd1cd7396..2d6712200 100644 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -1,11 +1,36 @@ To: $email Subject: $subject Mime-Version: 1.0 -Content-Type: multipart/related;boundary="nfmimeboundary" +Content-Type: multipart/related;boundary="nfcoremimeboundary" ---nfmimeboundary +--nfcoremimeboundary Content-Type: text/html; charset=utf-8 $email_html ---nfmimeboundary-- +<% +if (mqcFile){ +def mqcFileObj = new File("$mqcFile") +if (mqcFileObj.length() < mqcMaxSize){ +out << """ +--nfcoremimeboundary +Content-Type: text/html; name=\"multiqc_report\" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" + +${mqcFileObj. + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' )} +""" +}} +%> + +--nfcoremimeboundary-- diff --git a/bin/__pycache__/scrape_software_versions.cpython-36.pyc b/bin/__pycache__/scrape_software_versions.cpython-36.pyc index dc8b3944f..ebd68ae9b 100644 Binary files a/bin/__pycache__/scrape_software_versions.cpython-36.pyc and b/bin/__pycache__/scrape_software_versions.cpython-36.pyc differ diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index 0fb028c1f..0f709a2b1 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -41,9 +41,14 @@ if match: results[k] = "v{}".format(match.group(1)) +# Remove software set to false in results +for k in results: + if not results[k]: + del(results[k]) + # Dump to YAML print (''' -id: 'nf-core/eager-software-versions' +id: 'software_versions' section_name: 'nf-core/eager Software Versions' section_href: 'https://github.com/nf-core/eager' plot_type: 'html' @@ -52,5 +57,10 @@
''') for k,v in results.items(): - print("
{}
{}
".format(k,v)) + print("
{}
{}
".format(k,v)) print ("
") + +# Write out regexes as csv file: +with open('software_versions.csv', 'w') as f: + for k,v in results.items(): + f.write("{}\t{}\n".format(k,v)) diff --git a/conf/awsbatch.config b/conf/awsbatch.config index 79078c7bd..14af5866f 100644 --- a/conf/awsbatch.config +++ b/conf/awsbatch.config @@ -1,10 +1,15 @@ /* * ------------------------------------------------- - * Nextflow config file for AWS Batch + * Nextflow config file for running on AWS batch * ------------------------------------------------- - * Imported under the 'awsbatch' Nextflow profile in nextflow.config - * Uses docker for software depedencies automagically, so not specified here. + * Base config needed for running with -profile awsbatch */ +params { + config_profile_name = 'AWSBATCH' + config_profile_description = 'AWSBATCH Cloud Profile' + config_profile_contact = 'Alexander Peltzer (@apeltzer)' + config_profile_url = 'https://aws.amazon.com/de/batch/' +} aws.region = params.awsregion process.executor = 'awsbatch' diff --git a/conf/base.config b/conf/base.config index 0ca31ea23..d72dde862 100644 --- a/conf/base.config +++ b/conf/base.config @@ -14,7 +14,7 @@ process { memory = { check_max( 8.GB * task.attempt, 'memory' ) } time = { check_max( 2.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' diff --git a/conf/test.config b/conf/test.config index 143f4b227..af4449f71 100644 --- a/conf/test.config +++ b/conf/test.config @@ -8,6 +8,9 @@ */ params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + // Limit resources so that this can run on Travis max_cpus = 2 max_memory = 6.GB max_time = 48.h diff --git a/docs/README.md b/docs/README.md index f551fb53d..480e99ef8 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,10 +2,11 @@ The nf-core/eager documentation is split into the following files: -1. [Installation](installation.md) -2. [Running the pipeline](usage.md) -3. Pipeline configuration - * [Adding your own system](configuration/adding_your_own.md) - * [Reference genomes](configuration/reference_genomes.md) +1. [Installation](https://nf-co.re/usage/installation) +2. Pipeline configuration + * [Local installation](https://nf-co.re/usage/local_installation) + * [Adding your own system config](https://nf-co.re/usage/adding_own_config) + * [Reference genomes](https://nf-co.re/usage/reference_genomes) +3. [Running the pipeline](usage.md) 4. [Output and how to interpret the results](output.md) -5. [Troubleshooting](troubleshooting.md) +5. [Troubleshooting](https://nf-co.re/usage/troubleshooting) diff --git a/docs/configuration/adding_your_own.md b/docs/configuration/adding_your_own.md deleted file mode 100644 index 7793f2f80..000000000 --- a/docs/configuration/adding_your_own.md +++ /dev/null @@ -1,130 +0,0 @@ -# nf-core/eager: Configuration for other clusters - -## Introduction - -It is entirely possible to run this pipeline on your own clusters, though you will need to set up your own config file so that the pipeline knows how to work with your cluster. - -### Personal Profiles - -If you are the only person to be running this pipeline, you can create your config file as `~/.nextflow/config` and it will be applied every time you run Nextflow. Alternatively, save the file anywhere and reference it when running the pipeline with `-c path/to/config` (see the [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more). - -A basic configuration comes with the pipeline, which runs by default (the `standard` config profile - see [`conf/base.config`](../conf/base.config)). This means that you only need to configure the specifics for your system and overwrite any defaults that you want to change. - -### Institute Profiles - -In contrast, if you think that there are other people using the pipeline who would benefit from your configuration (e.g. other common cluster setups), you can create a config adapted to that cluster and is centrally stored and maintained at [nf-core/configs](https://github.com/nf-core/configs). Then, you can specify `-profile ` when running the pipeline without making your own custom config file. Furthermore, the same profile can be used for other nf-core pipelines. - -## Creating your own profile - -### Cluster Environment -By default, pipeline uses the `local` Nextflow executor - in other words, all jobs are run in the login session. If you're using a simple server, this may be fine. If you're using a compute cluster, this is bad as all jobs will run on the head node. - -To specify your cluster environment, add the following line to your config file: - -```nextflow -process.executor = 'YOUR_SYSTEM_TYPE' -``` - -Many different cluster types are supported by Nextflow. For more information, please see the [Nextflow documentation](https://www.nextflow.io/docs/latest/executor.html). - -Note that you may need to specify cluster options, such as a project or queue. To do so, use the `clusterOptions` config option: - -```nextflow -process { - executor = 'SLURM' - clusterOptions = '-A myproject' -} -``` -### Software Requirements -To run the pipeline, several software packages are required. How you satisfy these requirements is essentially up to you and depends on your system. If possible, we _highly_ recommend using either Docker or Singularity. -Please see the [`installation documentation`](../installation.md) for how to run using the below as a one-off. These instructions are about configuring a config file for repeated use. - -#### Docker -Docker is a great way to run nf-core/eager, as it manages all software installations and allows the pipeline to be run in an identical software environment across a range of systems. - -Nextflow has [excellent integration](https://www.nextflow.io/docs/latest/docker.html) with Docker, and beyond installing the two tools, not much else is required - nextflow will automatically fetch the [nfcore/eager](https://hub.docker.com/r/nfcore/eager/) image that we have created and is hosted at dockerhub at run time. - -To add docker support to your own config file, add the following: - -```nextflow -docker.enabled = true -process.container = "nfcore/eager" -``` - -Note that the dockerhub organisation name annoyingly can't have a hyphen, so is `nfcore` and not `nf-core`. - - -#### Singularity image -Many HPC environments are not able to run Docker due to security issues. -[Singularity](http://singularity.lbl.gov/) is a tool designed to run on such HPC systems which is very similar to Docker. - -To specify singularity usage in your pipeline config file, add the following: - -```nextflow -singularity.enabled = true -process.container = "shub://nf-core/eager" -``` -If you intend to run the pipeline offline, nextflow will not be able to automatically download the singularity image for you. -Instead, you'll have to do this yourself manually first, transfer the image file and then point to that. - -First, pull the image file where you have an internet connection: - -```bash -singularity pull --name nf-core-eager.simg shub://nf-core/eager -``` - -Then transfer this file and point the config file to the image: - -```nextflow -singularity.enabled = true -process.container = "/path/to/nf-core-eager.simg" -``` - -By default nextflow will store a singularity image in the working directory of a job. You can alternatively further specify a 'central' singularity cache to keep all singularity contains for a(ll) user(s). This can be -done by either setting a central environmental variable `NXF_SINGULARITY_CACHEDIR` or specifying the location in a nextflow config file with `singularity.cacheDir`. - -#### Conda -If you're not able to use Docker or Singularity, you can instead use conda to manage the software requirements. -To use conda in your own config file, add the following: - -```nextflow -process.conda = "$baseDir/environment.yml" -``` - -### Software Caches - -Each new version of a pipeline downloaded and ran, will pull down a new image (docker/singularity)/collection (conda) of all the software required for the pipeline. By default this will be placed in the `work/` directory of an EAGER run. When running lots of pipeline jobs, this can slow down the pipeline (having to create a download a new environment each time) and take up a lot of hard-disk space (as each run has it's own duplicate of the environment). - -You can specify a central location for this using the `cacheDir` parameter [(see nextflow documentation)](https://www.nextflow.io/docs/latest/config.html). This can either be central for all users e.g. - -``` -singularity { - enabled = TRUE - cacheDir = '///' -} -``` - -Or if you give freedom to users as to which version they use - -``` -conda { - cacheDir = "///$USER/" -} -``` - -### Job Resources - -#### Automatic resubmission -Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with an error code of `143` (exceeded requested resources) it will automatically resubmit with higher requests (2 x original, then 3 x original). If it still fails after three times then the pipeline is stopped. - -#### Custom resource requests -Wherever process-specific requirements are set in the pipeline, the default value can be changed by creating a custom config file. See the files in [`conf`](../conf) for examples. - -### AWS Batch specific parameters -Running the pipeline on AWS Batch requires a couple of specific parameters to be set according to your AWS Batch configuration. Please use the `-awsbatch` profile and then specify all of the following parameters. -#### `--awsqueue` -The JobQueue that you intend to use on AWS Batch. -#### `--awsregion` -The AWS region to run your job in. Default is set to `eu-west-1` but can be adjusted to your needs. - -Please make sure to also set the `-w/--work-dir` and `--outdir` parameters to a S3 storage bucket of your choice - you'll get an error message notifying you if you didn't. diff --git a/docs/configuration/reference_genomes.md b/docs/configuration/reference_genomes.md deleted file mode 100644 index 324cfef07..000000000 --- a/docs/configuration/reference_genomes.md +++ /dev/null @@ -1,50 +0,0 @@ -# nf-core/eager: Reference Genomes Configuration - -The nf-core/eager pipeline needs a reference genome for alignment and annotation. - -These paths can be supplied on the command line at run time (see the [usage docs](../usage.md)), -but for convenience it's often better to save these paths in a nextflow config file. -See below for instructions on how to do this. -Read [Adding your own system](adding_your_own.md) to find out how to set up custom config files. - -## Adding paths to a config file -Specifying long paths every time you run the pipeline is a pain. -To make this easier, the pipeline comes configured to understand reference genome keywords which correspond to preconfigured paths, meaning that you can just specify `--genome ID` when running the pipeline. - -Note that this genome key can also be specified in a config file if you always use the same genome. - -To use this system, add paths to your config file using the following template: - -```nextflow -params { - genomes { - 'YOUR-ID' { - fasta = '/genome.fa' - } - 'OTHER-GENOME' { - // [..] - } - } - // Optional - default genome. Ignored if --genome 'OTHER-GENOME' specified on command line - genome = 'YOUR-ID' -} -``` - -You can add as many genomes as you like as long as they have unique IDs. - -## illumina iGenomes - -To make the use of reference genomes easier, illumina has developed a centralised resource called [iGenomes](https://support.illumina.com/sequencing/sequencing_software/igenome.html). -Multiple reference index types are held together with consistent structure for multiple genomes. - -We have put a copy of iGenomes up onto AWS S3 hosting and this pipeline is configured to use this by default. -The hosting fees for AWS iGenomes are currently kindly funded by a grant from Amazon. -The pipeline will automatically download the required reference files when you run the pipeline. -For more information about the AWS iGenomes, see https://ewels.github.io/AWS-iGenomes/ - -Downloading the files takes time and bandwidth, so we recommend making a local copy of the iGenomes resource. -Once downloaded, you can customise the variable `params.igenomes_base` in your custom configuration file to point to the reference location. -For example: -```nextflow -params.igenomes_base = '/path/to/data/igenomes/' -``` diff --git a/docs/installation.md b/docs/installation.md deleted file mode 100644 index 2815cfd8b..000000000 --- a/docs/installation.md +++ /dev/null @@ -1,136 +0,0 @@ -# nf-core/eager: Installation - -To start using the nf-core/eager pipeline, follow the steps below: - -1. [Install Nextflow](#1-install-nextflow) -2. [Install the pipeline](#2-install-the-pipeline) - * [Automatic](#21-automatic) - * [Offline](#22-offline) - * [Development](#23-development) -3. [Pipeline configuration](#3-pipeline-configuration) - * [Software deps: Docker and Singularity](#31-software-deps-docker-and-singularity) - * [Software deps: Bioconda](#32-software-deps-bioconda) -4. [Terminal configuration](#4-terminal-configuration) -5. [Appendices](#appendices) - * [Running on UPPMAX](#running-on-uppmax) - -## 1) Install NextFlow -Nextflow runs on most POSIX systems (Linux, Mac OSX etc). It can be installed by running the following commands: - -```bash -# Make sure that Java v8+ is installed: -java -version - -# Install Nextflow -curl -fsSL get.nextflow.io | bash - -# Add the Nextflow binary to your PATH: -mv nextflow ~/bin/ -# OR system-wide installation: -# sudo mv nextflow /usr/local/bin -``` -See [nextflow.io](https://www.nextflow.io/) for further instructions on how to install and configure Nextflow. - -## 2) Install the pipeline - -### 2.1) Automatic -This pipeline itself needs no installation - NextFlow will automatically fetch it from GitHub if `nf-core/eager` is specified as the pipeline name. - -### 2.2) Offline -The above method requires an internet connection so that Nextflow can download the pipeline files. If you're running on a system that has no internet connection, you'll need to download and transfer the pipeline files manually: - -```bash -wget https://github.com/nf-core/eager/archive/master.zip -mkdir -p ~/my-pipelines/nf-core/ -unzip master.zip -d ~/my-pipelines/nf-core/ -cd ~/my_data/ -nextflow run ~/my-pipelines/nf-core/eager-master -``` - -To stop nextflow from looking for updates online, you can tell it to run in offline mode by specifying the following environment variable in your ~/.bashrc file: - -```bash -export NXF_OFFLINE='TRUE' -``` - -### 2.3) Development - -If you would like to make changes to the pipeline, it's best to make a fork on GitHub and then clone the files. Once cloned you can run the pipeline directly as above. - - -## 3) Pipeline configuration -By default, the pipeline runs with the `standard` configuration profile. This uses a number of sensible defaults for process requirements and is suitable for running on a simple (if powerful!) basic server. You can see this configuration in [`conf/base.config`](../conf/base.config). - -Be warned of two important points about this default configuration: - -1. The default profile uses the `local` executor - * All jobs are run in the login session. If you're using a simple server, this may be fine. If you're using a compute cluster, this is bad as all jobs will run on the head node. - * See the [nextflow docs](https://www.nextflow.io/docs/latest/executor.html) for information about running with other hardware backends. Most job scheduler systems are natively supported. -2. Nextflow will expect all software to be installed and available on the `PATH` - -The following software is currently required to be installed: - -* [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) -* [Picard Tools](https://broadinstitute.github.io/picard/) -* [Samtools](http://www.htslib.org/) -* [Preseq](http://smithlabresearch.org/software/preseq/) -* [MultiQC](https://multiqc.info/) -* [BWA](http://bio-bwa.sourceforge.net/) -* [Qualimap](http://qualimap.bioinfo.cipf.es/) -* [GATK](https://software.broadinstitute.org/gatk/) -* [bamUtil](https://genome.sph.umich.edu/wiki/BamUtil) -* [fastP](https://github.com/OpenGene/fastp) -* [DamageProfiler](https://github.com/Integrative-Transcriptomics/DamageProfiler) - - -### 3.1) Software deps: Docker -First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/) - -Then, running the pipeline with the option `-profile standard,docker` tells Nextflow to enable Docker for this run. An image containing all of the software requirements will be automatically fetched and used from dockerhub (https://hub.docker.com/r/nfcore/eager). - -### 3.2) Software deps: Singularity -If you're not able to use Docker then [Singularity](http://sylabs.io) is a great alternative. -The process is very similar: running the pipeline with the option `-profile standard,singularity` tells Nextflow to enable singularity for this run. An image containing all of the software requirements will be automatically fetched and used from singularity hub. - -If running offline with Singularity, you'll need to download and transfer the Singularity image first: - -```bash -singularity pull --name nf-core-eager.simg shub://nf-core/eager -``` - -Once transferred, use `-with-singularity` and specify the path to the image file: - -```bash -nextflow run /path/to/nf-core-eager -with-singularity nf-core-eager.simg -``` - -Remember to pull updated versions of the singularity image if you update the pipeline. - - -### 3.3) Software deps: conda -If you're not able to use Docker _or_ Singularity, you can instead use conda to manage the software requirements. -This is slower and less reproducible than the above, but is still better than having to install all requirements yourself! -The pipeline ships with a conda environment file and nextflow has built-in support for this. -To use it first ensure that you have conda installed (we recommend [miniconda](https://conda.io/miniconda.html)), then follow the same pattern as above and use the flag `-profile standard,conda` - -## 4) Terminal configuration -Nextflow handles job submissions on SLURM or other environments, and supervises running the jobs. Thus the Nextflow process must run until the pipeline is finished. We recommend that you put the process running in the background through `screen` / `tmux` or similar tool. Alternatively you can run nextflow within a cluster job submitted your job scheduler. - -It is recommended to limit the Nextflow Java virtual machines memory. We recommend adding the following line to your environment (typically in `~/.bashrc` or `~./bash_profile`): - -```bash -NXF_OPTS='-Xms1g -Xmx4g' -``` - -## Appendices - -#### Running on UPPMAX -To run the pipeline on the [Swedish UPPMAX](https://www.uppmax.uu.se/) clusters (`rackham`, `irma`, `bianca` etc), use the command line flag `-profile uppmax`. This tells Nextflow to submit jobs using the SLURM job executor with Singularity for software dependencies. - -Note that you will need to specify your UPPMAX project ID when running a pipeline. To do this, use the command line flag `--project `. The pipeline will exit with an error message if you try to run it pipeline with the default UPPMAX config profile without a project. - -**Optional Extra:** To avoid having to specify your project every time you run Nextflow, you can add it to your personal Nextflow config file instead. Add this line to `~/.nextflow/config`: - -```nextflow -params.project = 'project_ID' // eg. b2017123 -``` diff --git a/docs/output.md b/docs/output.md index f34f30053..6cd97cd95 100644 --- a/docs/output.md +++ b/docs/output.md @@ -50,4 +50,4 @@ The pipeline has special steps which allow the software versions used to be repo * `Project_multiqc_data/` * Directory containing parsed statistics from the different tools used in the pipeline -For more information about how to use MultiQC reports, see http://multiqc.info +For more information about how to use MultiQC reports, see [http://multiqc.info](http://multiqc.info) diff --git a/docs/usage.md b/docs/usage.md index 436f92e9e..de8212695 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -2,7 +2,10 @@ ## Table of contents -* [Introduction](#general-nextflow-info) + + +* [Table of contents](#table-of-contents) +* [Introduction](#introduction) * [Running the pipeline](#running-the-pipeline) * [Updating the pipeline](#updating-the-pipeline) * [Reproducibility](#reproducibility) @@ -84,12 +87,14 @@ For more details on how to set up your own private profile, please see [installa **Basic profiles** These are basic profiles which primarily define where you derive the pipeline's software packages from. These are typically the profiles you would use if you are running the pipeline on your **own PC** (vs. a HPC cluster - see below). -* `standard` - * The default profile, used if `-profile` is not specified at all. - * Runs locally and expects all software to be installed and available on the `PATH`. +* `awsbatch` + * A generic configuration profile to be used with AWS Batch. +* `conda` + * A generic configuration profile to be used with [conda](https://conda.io/docs/) + * Pulls most software from [Bioconda](https://bioconda.github.io/) * `docker` - * A generic configuration profile to be used with [Docker](http://docker.com/) - * Pulls software from dockerhub: [`nfcore/eager`](http://hub.docker.com/r/nfcore/eager/) + * A generic configuration profile to be used with [Docker](http://docker.com/) + * Pulls software from dockerhub: [`nfcore/eager`](http://hub.docker.com/r/nfcore/eager/) * `singularity` * A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/) * Pulls software from singularity-hub @@ -272,10 +277,14 @@ Specify the path to a specific nextflow config file (this is a core NextFlow com **NB:** Single hyphen (core Nextflow option) -Note - you can use this to override defaults. For example, you can specify a config file using `-c` that contains the following: +Note - you can use this to override pipeline defaults. -```nextflow -process.$multiqc.module = [] +### `--custom_config_version` +Provide git commit id for custom Institutional configs hosted at `nf-core/configs`. This was implemented for reproducibility purposes. Default is set to `master`. + +```bash +## Download and use config file with following git commid id +--custom_config_version d52db660777c4bf36546ddb188ec530c3ada1b96 ``` ### `--plaintext_email` Set to receive plain-text e-mails instead of HTML formatted. @@ -525,5 +534,10 @@ If you're ready, you can then remove the files with ```bash nextflow clean -f ``` - This will make your system administrator very happy as you will _halve_ the harddrive footprint of the run, so be sure to do this! + +### `--monochrome_logs` +Set to disable colourful command line output and live life in monochrome. + +### `--multiqc_config` +Specify a path to a custom MultiQC configuration file. diff --git a/environment.yml b/environment.yml index c609c6b9a..48897437f 100644 --- a/environment.yml +++ b/environment.yml @@ -9,18 +9,18 @@ dependencies: - bioconda::adapterremoval=2.2.2 - bioconda::adapterremovalfixprefix=0.0.4 - bioconda::bwa=0.7.17 - - bioconda::picard=2.18.27 + - bioconda::picard=2.19.0 - bioconda::samtools=1.9 - bioconda::dedup=0.12.5 - bioconda::angsd=0.923 - bioconda::circularmapper=1.93.4 - - bioconda::gatk4=4.1.0.0 + - bioconda::gatk4=4.1.1.0 - bioconda::qualimap=2.2.2b - bioconda::vcf2genome=0.91 - - bioconda::damageprofiler=0.4.4 + - bioconda::damageprofiler=0.4.5 - bioconda::multiqc=1.7 - bioconda::pmdtools=0.60 - - conda-forge::r-rmarkdown=1.11 + - conda-forge::r-rmarkdown=1.12 - conda-forge::libiconv=1.15 - conda-forge::pigz=2.3.4 - bioconda::sequencetools=1.2.2 diff --git a/main.nf b/main.nf index 26a93a981..967de9448 100644 --- a/main.nf +++ b/main.nf @@ -10,10 +10,12 @@ Alexander Peltzer apeltzer - https://github.com/apeltzer> James A. Fellows Yates - https://github.com/jfy133 Stephen Clayton - https://github.com/sc13-bioinf + Maxime Borry - https://github.com/maxibor ======================================================================================== */ def helpMessage() { + log.info nfcoreHeader() log.info""" ========================================= eager v${workflow.manifest.version} @@ -108,6 +110,7 @@ def helpMessage() { --outdir The output directory where the results will be saved --email Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits --plaintext_email Receive plain text emails rather than HTML + --maxMultiqcEmailFileSize Threshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) -name Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. --max_memory Memory limit for each step of pipeline. Should be in form e.g. --max_memory '8.GB' --max_time Time limit for each step of the pipeline. Should be in form e.g. --max_memory '2.h' @@ -213,11 +216,6 @@ params.bamutils_clip_left = 1 params.bamutils_clip_right = 1 params.bamutils_softclip = false - - - -ch_multiqc_config = Channel.fromPath(params.multiqc_config) -ch_output_docs = Channel.fromPath("$baseDir/docs/output.md") Channel.fromPath("$baseDir/assets/where_are_my_files.txt") .into{ ch_where_for_bwa_index; ch_where_for_fasta_index; ch_where_for_seqdict} @@ -270,14 +268,6 @@ if (params.skip_collapse && params.singleEnd){ exit 1, "--skip_collapse can only be set for pairedEnd samples!" } -//AWSBatch sanity checking -if(workflow.profile == 'awsbatch'){ - if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - if (!workflow.workDir.startsWith('s3') || !params.outdir.startsWith('s3')) exit 1, "Specify S3 URLs for workDir and outdir parameters on AWSBatch!" -} - - - // Has the run name been specified by the user? // this has the bonus effect of catching both -name and --name custom_runName = params.name @@ -285,6 +275,20 @@ if( !(workflow.runName ==~ /[a-z]+_[a-z]+/) ){ custom_runName = workflow.runName } +if( workflow.profile == 'awsbatch') { + // AWSBatch sanity checking + if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" + // Check outdir paths to be S3 buckets if running on AWSBatch + // related: https://github.com/nextflow-io/nextflow/issues/813 + if (!params.outdir.startsWith('s3:')) exit 1, "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" + // Prevent trace files to be stored on S3 since S3 does not support rolling files. + if (workflow.tracedir.startsWith('s3:')) exit 1, "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." +} + +// Stage config files +ch_multiqc_config = Channel.fromPath(params.multiqc_config) +ch_output_docs = Channel.fromPath("$baseDir/docs/output.md") + /* * Create a channel for input read files * Dump can be used for debugging purposes, e.g. using the -dump-channels operator on run @@ -342,9 +346,7 @@ if( params.readPaths ){ } // Header log info -log.info "=========================================" -log.info " nf-core/eager v${workflow.manifest.version}" -log.info "=========================================" +log.info nfcoreHeader() def summary = [:] summary['Pipeline Name'] = 'nf-core/eager' summary['Pipeline Version'] = workflow.manifest.version @@ -371,16 +373,25 @@ summary['Output dir'] = params.outdir summary['Script dir'] = workflow.projectDir summary['Config Profile'] = workflow.profile if(workflow.profile == 'awsbatch'){ - summary['AWS Region'] = params.awsregion - summary['AWS Queue'] = params.awsqueue + summary['AWS Region'] = params.awsregion + summary['AWS Queue'] = params.awsqueue } if(params.email) summary['E-mail Address'] = params.email -log.info summary.collect { k,v -> "${k.padRight(35)}: $v" }.join("\n") -log.info "=========================================" +summary['Config Profile'] = workflow.profile +if(params.config_profile_description) summary['Config Description'] = params.config_profile_description +if(params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact +if(params.config_profile_url) summary['Config URL'] = params.config_profile_url +if(params.email) { + summary['E-mail Address'] = params.email + summary['MultiQC maxsize'] = params.maxMultiqcEmailFileSize +} +log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n") +log.info "\033[2m----------------------------------------------------\033[0m" +// Check the hostnames against configured profiles +checkHostname() def create_workflow_summary(summary) { - def yaml_file = workDir.resolve('workflow_summary_mqc.yaml') yaml_file.text = """ id: 'nf-core-eager-summary' @@ -1276,10 +1287,25 @@ workflow.onComplete { if(workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository if(workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId if(workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision + if(workflow.container) email_fields['summary']['Docker image'] = workflow.container email_fields['summary']['Nextflow Version'] = workflow.nextflow.version email_fields['summary']['Nextflow Build'] = workflow.nextflow.build email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + // On success try attach the multiqc report + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList){ + log.warn "[nf-core/eager] Found multiple reports from process 'multiqc', will use only one" + mqc_report = mqc_report[0] + } + } + } catch (all) { + log.warn "[nf-core/eager] Could not attach MultiQC report to summary email" + } + // Render the TXT template def engine = new groovy.text.GStringTemplateEngine() def tf = new File("$baseDir/assets/email_template.txt") @@ -1292,7 +1318,7 @@ workflow.onComplete { def email_html = html_template.toString() // Render the sendmail template - def smail_fields = [ email: params.email, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir" ] + def smail_fields = [ email: params.email, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize: params.maxMultiqcEmailFileSize.toBytes() ] def sf = new File("$baseDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) def sendmail_html = sendmail_template.toString() @@ -1312,7 +1338,7 @@ workflow.onComplete { } // Write summary e-mail HTML to a file - def output_d = new File( "${params.outdir}/Documentation/" ) + def output_d = new File( "${params.outdir}/pipeline_info/" ) if( !output_d.exists() ) { output_d.mkdirs() } @@ -1321,5 +1347,67 @@ workflow.onComplete { def output_tf = new File( output_d, "pipeline_report.txt" ) output_tf.withWriter { w -> w << email_txt } - log.info "[nf-core/eager] Pipeline Complete" + c_reset = params.monochrome_logs ? '' : "\033[0m"; + c_purple = params.monochrome_logs ? '' : "\033[0;35m"; + c_green = params.monochrome_logs ? '' : "\033[0;32m"; + c_red = params.monochrome_logs ? '' : "\033[0;31m"; + + if (workflow.stats.ignoredCountFmt > 0 && workflow.success) { + log.info "${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}" + log.info "${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCountFmt} ${c_reset}" + log.info "${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCountFmt} ${c_reset}" + } + + if(workflow.success){ + log.info "${c_purple}[nf-core/eager]${c_green} Pipeline completed successfully${c_reset}" + } else { + checkHostname() + log.info "${c_purple}[nf-core/eager]${c_red} Pipeline completed with errors${c_reset}" + } + +} + + +def nfcoreHeader(){ + // Log colors ANSI codes + c_reset = params.monochrome_logs ? '' : "\033[0m"; + c_dim = params.monochrome_logs ? '' : "\033[2m"; + c_black = params.monochrome_logs ? '' : "\033[0;30m"; + c_green = params.monochrome_logs ? '' : "\033[0;32m"; + c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; + c_blue = params.monochrome_logs ? '' : "\033[0;34m"; + c_purple = params.monochrome_logs ? '' : "\033[0;35m"; + c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; + c_white = params.monochrome_logs ? '' : "\033[0;37m"; + + return """ ${c_dim}----------------------------------------------------${c_reset} + ${c_green},--.${c_black}/${c_green},-.${c_reset} + ${c_blue} ___ __ __ __ ___ ${c_green}/,-._.--~\'${c_reset} + ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} + ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} + ${c_green}`._,._,\'${c_reset} + ${c_purple} nf-core/eager v${workflow.manifest.version}${c_reset} + ${c_dim}----------------------------------------------------${c_reset} + """.stripIndent() +} + +def checkHostname(){ + def c_reset = params.monochrome_logs ? '' : "\033[0m" + def c_white = params.monochrome_logs ? '' : "\033[0;37m" + def c_red = params.monochrome_logs ? '' : "\033[1;91m" + def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m" + if(params.hostnames){ + def hostname = "hostname".execute().text.trim() + params.hostnames.each { prof, hnames -> + hnames.each { hname -> + if(hostname.contains(hname) && !workflow.profile.contains(prof)){ + log.error "====================================================\n" + + " ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" + + " but your machine hostname is ${c_white}'$hostname'${c_reset}\n" + + " ${c_yellow_bold}It's highly recommended that you use `-profile $prof${c_reset}`\n" + + "============================================================" + } + } + } + } } diff --git a/nextflow.config b/nextflow.config index bbaad2639..2a8e65c01 100644 --- a/nextflow.config +++ b/nextflow.config @@ -3,9 +3,6 @@ * nf-core/eager Nextflow config file * ------------------------------------------------- * Default config options for all environments. - * Cluster-specific config options should be saved - * in the conf folder and imported under a profile - * name here. */ // Global default params, used in configs params { @@ -18,11 +15,10 @@ params { saveAlignedIntermediates = false singleEnd = false reads = "data/*{1,2}.fastq.gz" - outdir = './results' - tracedir = "${params.outdir}/pipeline_info" readPaths = false bam = false large_ref = false + outdir = './results' //More defaults complexity_filter = false @@ -34,20 +30,31 @@ params { skip_trim = false skip_adapterremoval = false - // AWS Batch + // Boilerplate options + name = false + multiqc_config = "$baseDir/assets/multiqc_config.yaml" + email = false + maxMultiqcEmailFileSize = 25.MB + plaintext_email = false + monochrome_logs = false + help = false + igenomes_base = "./iGenomes" + tracedir = "${params.outdir}/pipeline_info" awsqueue = false awsregion = 'eu-west-1' - igenomesIgnore = false custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + hostnames = false + config_profile_description = false + config_profile_contact = false + config_profile_url = false } // Container slug. Stable releases should specify release tag! // Developmental code should specify :dev process.container = 'nfcore/eager:dev' - // Load base.config by default for all pipelines includeConfig 'conf/base.config' @@ -82,30 +89,29 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] timeline { enabled = true - file = "${params.tracedir}/nf-core/eager_timeline.html" + file = "${params.tracedir}/execution_timeline.html" } report { enabled = true - file = "${params.tracedir}/nf-core/eager_report.html" + file = "${params.tracedir}/execution_report.html" } trace { enabled = true - file = "${params.tracedir}/nf-core/eager_trace.txt" + file = "${params.tracedir}/execution_trace.txt" } dag { enabled = true - file = "${params.tracedir}/nf-core/eager_dag.svg" + file = "${params.tracedir}/pipeline_dag.svg" } manifest { name = 'nf-core/eager' - author = 'Alexander Peltzer, Stephen Clayton, James A Fellows-Yates' + author = 'Alexander Peltzer, Stephen Clayton, James A. Fellows Yates, Maxime Borry' homePage = 'https://github.com/nf-core/eager' version = '2.0.7dev' - description = 'A fully reproducible and modern ancient DNA pipeline in Nextflow and with cloud support.' + description = 'A fully reproducible ancient and modern DNA pipeline in Nextflow and with cloud support.' mainScript = 'main.nf' nextflowVersion = '>=0.32.0' - } // Function to ensure that resource requirements don't go beyond