Skip to content
This repository was archived by the owner on Jan 27, 2020. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- [#671](https://github.com/SciLifeLab/Sarek/pull/671) - New `publishDirMode` param and docs
- [#673](https://github.com/SciLifeLab/Sarek/pull/673), [#675](https://github.com/SciLifeLab/Sarek/pull/675), [#676](https://github.com/SciLifeLab/Sarek/pull/676) - Profiles for BinAC and CFC clusters in Tübingen
- [#679](https://github.com/SciLifeLab/Sarek/pull/679) - Add container for `CreateIntervalBeds`
- [#692](https://github.com/SciLifeLab/Sarek/pull/692) - Add AWS iGenomes possibilities (currently under `iGRCh37` and `iGRCh38`)
- [#692](https://github.com/SciLifeLab/Sarek/pull/692), [#697](https://github.com/SciLifeLab/Sarek/pull/697) - Add AWS iGenomes possibilities (within `conf/igenomes.conf`)
- [#694](https://github.com/SciLifeLab/Sarek/pull/694) - Add monochrome and grey logos for light or dark background

### `Changed`
Expand Down
2 changes: 1 addition & 1 deletion conf/aws-batch.config
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
*/

params {
genome_base = params.genome == 'iGRCh37' ? "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh37" : params.genome == 'iGRCh38' ? "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38" : "s3://sarek-references/small"
genome_base = params.genome == 'GRCh37' ? "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh37" : params.genome == 'GRCh38' ? "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38" : "s3://sarek-references/small"
publishDirMode = 'copy'
}

Expand Down
1 change: 0 additions & 1 deletion conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
* -------------------------------------------------
*/

includeConfig 'genomes.config'
wf_repository = 'maxulysse'

params {
Expand Down
32 changes: 4 additions & 28 deletions conf/genomes.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
* -------------------------------------------------
* Path to reference files
* -------------------------------------------------
* Imported under all Nextflow profiles in
* Imported under Nextflow profiles in
* nextflow.config
* -------------------------------------------------
* Modify to add specific versions of genomes
* Defines reference genomes, using paths
* Can be used by any config that customises the base
* path using $params.genome_base / --genome_base
* -------------------------------------------------
*/

Expand Down Expand Up @@ -42,32 +44,6 @@ params {
//AF_files = "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf"
//AF_indexes = "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf.idx"
}
'iGRCh37' {
acLoci = "${params.genome_base}/Annotation/ASCAT/1000G_phase3_20130502_SNP_maf0.3.loci"
dbsnp = "${params.genome_base}/Annotation/GATKBundle/dbsnp_138.b37.vcf"
dbsnpIndex = "${params.genome_base}/Annotation/GATKBundle/dbsnp_138.b37.vcf.idx"
genomeFile = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta"
genomeDict = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.dict"
genomeIndex = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.fai"
bwaIndex = "${params.genome_base}/Sequence/BWAIndex/human_g1k_v37_decoy.fasta.{amb,ann,bwt,pac,sa}"
intervals = "${params.genome_base}/Annotation/intervals/wgs_calling_regions_CAW.list"
knownIndels = "${params.genome_base}/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf"
knownIndelsIndex = "${params.genome_base}/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.idx"
snpeffDb = "GRCh37.75"
}
'iGRCh38' {
acLoci = "${params.genome_base}/Annotation/ASCAT/1000G_phase3_GRCh38_maf0.3.loci"
dbsnp = "${params.genome_base}/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz"
dbsnpIndex = "${params.genome_base}/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi"
genomeFile = "${params.genome_base}/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta"
genomeDict = "${params.genome_base}/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict"
genomeIndex = "${params.genome_base}/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai"
bwaIndex = "${params.genome_base}/Sequence/BWAIndex/Homo_sapiens_assembly38.fasta.64.{alt,amb,ann,bwt,pac,sa}"
intervals = "${params.genome_base}/Annotation/intervals/wgs_calling_regions.hg38.bed"
knownIndels = "${params.genome_base}/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz"
knownIndelsIndex = "${params.genome_base}/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi"
snpeffDb = "GRCh38.86"
}
'smallGRCh37' {
acLoci = "${params.genome_base}/1000G_phase3_20130502_SNP_maf0.3.small.loci"
dbsnp = "${params.genome_base}/dbsnp_138.b37.small.vcf"
Expand Down
58 changes: 58 additions & 0 deletions conf/igenomes.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* -------------------------------------------------
* Nextflow config file for Sarek
* -------------------------------------------------
* Path to iGenomes reference files
* -------------------------------------------------
* Imported under Nextflow profiles in
* nextflow.config
* -------------------------------------------------
* Defines reference genomes, using iGenome paths
* Can be used by any config that customises the base
* path using $params.genome_base / --genome_base
* -------------------------------------------------
*/

params {
genomes {
'GRCh37' {
acLoci = "${params.genome_base}/Annotation/ASCAT/1000G_phase3_20130502_SNP_maf0.3.loci"
dbsnp = "${params.genome_base}/Annotation/GATKBundle/dbsnp_138.b37.vcf"
dbsnpIndex = "${params.genome_base}/Annotation/GATKBundle/dbsnp_138.b37.vcf.idx"
genomeFile = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta"
genomeDict = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.dict"
genomeIndex = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.fai"
bwaIndex = "${params.genome_base}/Sequence/BWAIndex/human_g1k_v37_decoy.fasta.{amb,ann,bwt,pac,sa}"
intervals = "${params.genome_base}/Annotation/intervals/wgs_calling_regions_CAW.list"
knownIndels = "${params.genome_base}/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf"
knownIndelsIndex = "${params.genome_base}/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.idx"
snpeffDb = "GRCh37.75"
}
'GRCh38' {
acLoci = "${params.genome_base}/Annotation/ASCAT/1000G_phase3_GRCh38_maf0.3.loci"
dbsnp = "${params.genome_base}/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz"
dbsnpIndex = "${params.genome_base}/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi"
genomeFile = "${params.genome_base}/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta"
genomeDict = "${params.genome_base}/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict"
genomeIndex = "${params.genome_base}/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai"
bwaIndex = "${params.genome_base}/Sequence/BWAIndex/Homo_sapiens_assembly38.fasta.64.{alt,amb,ann,bwt,pac,sa}"
intervals = "${params.genome_base}/Annotation/intervals/wgs_calling_regions.hg38.bed"
knownIndels = "${params.genome_base}/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz"
knownIndelsIndex = "${params.genome_base}/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi"
snpeffDb = "GRCh38.86"
}
'smallGRCh37' {
acLoci = "${params.genome_base}/1000G_phase3_20130502_SNP_maf0.3.small.loci"
dbsnp = "${params.genome_base}/dbsnp_138.b37.small.vcf"
dbsnpIndex = "${dbsnp}.idx"
genomeFile = "${params.genome_base}/human_g1k_v37_decoy.small.fasta"
bwaIndex = "${genomeFile}.{amb,ann,bwt,pac,sa}"
genomeDict = "${params.genome_base}/human_g1k_v37_decoy.small.dict"
genomeIndex = "${genomeFile}.fai"
intervals = "${params.genome_base}/small.intervals"
knownIndels = "${params.genome_base}/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.small.vcf"
knownIndelsIndex = "${params.genome_base}/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.small.vcf.idx"
snpeffDb = "GRCh37.75"
}
}
}
28 changes: 19 additions & 9 deletions docs/REFERENCES.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# Genomes and reference files

Sarek currently uses GRCh38 by default. The settings are in `genomes.config`, they can be tailored to your needs.
Sarek currently uses GRCh38 by default.
The settings are in `genomes.config`, they can be tailored to your needs.
The [`buildReferences.nf`](#buildreferencesnf) script is used to build the indexes for the reference test.

## GRCh37

Use `--genome GRCh37` to map against GRCh37. Before doing so and if you are not on UPPMAX, you need to adjust the settings in `genomes.config` to your needs.
Use `--genome GRCh37` to map against GRCh37.
Before doing so and if you are not on UPPMAX, you need to adjust the settings in `genomes.config` to your needs.

### GATK bundle

Expand All @@ -20,21 +22,27 @@ The following files need to be downloaded:

### Other files for GRCh37

From our repo, get the [`intervals` list file](https://raw.githubusercontent.com/SciLifeLab/Sarek/master/repeats/wgs_calling_regions.grch37.list). More information about this file in the [intervals documentation](INTERVALS.md)
From our repo, get the [`intervals` list file](https://raw.githubusercontent.com/SciLifeLab/Sarek/master/repeats/wgs_calling_regions.grch37.list).
More information about this file in the [intervals documentation](INTERVALS.md)

Description of how to generate the Loci file used in the ASCAT process is described [here](https://github.com/SciLifeLab/Sarek/blob/master/docs/ASCAT.md).

You can create your own cosmic reference for any human reference as specified below in the Cosmic section.

## GRCh38

Use `--genome GRCh38` to map against GRCh38. Before doing so and if you are not on UPPMAX, you need to adjust the settings in `genomes.config` to your needs.
Use `--genome GRCh38` to map against GRCh38.
Before doing so and if you are not on UPPMAX, you need to adjust the settings in `genomes.config` to your needs.

To get the needed files, download the GATK bundle for GRCh38 from [ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/](ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/). You can also download the required files from the Google Cloud mirror link [here](https://console.cloud.google.com/storage/browser/genomics-public-data/resources/broad/hg38/v0).
To get the needed files, download the GATK bundle for GRCh38 from [ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/](ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/).
You can also download the required files from the Google Cloud mirror link [here](https://console.cloud.google.com/storage/browser/genomics-public-data/resources/broad/hg38/v0).

The MD5SUM of `Homo_sapiens_assembly38.fasta` included in that file is 7ff134953dcca8c8997453bbb80b6b5e.

If you download the data from the FTP servers `beta/` directory, which seems to be an older version of the bundle, only `Homo_sapiens_assembly38.known_indels.vcf` is needed. Also, you can omit `dbsnp_138_` and `dbsnp_144` files as we use `dbsnp_146`. The old ones also use the wrong chromosome naming convention. The Google Cloud mirror has all data in the `v0` directory, but requires you to remove the `resources_broad_hg38_v0_` prefixes from all files.
If you download the data from the FTP servers `beta/` directory, which seems to be an older version of the bundle, only `Homo_sapiens_assembly38.known_indels.vcf` is needed.
Also, you can omit `dbsnp_138_` and `dbsnp_144` files as we use `dbsnp_146`.
The old ones also use the wrong chromosome naming convention.
The Google Cloud mirror has all data in the `v0` directory, but requires you to remove the `resources_broad_hg38_v0_` prefixes from all files.

The following files need to be downloaded:

Expand Down Expand Up @@ -68,7 +76,8 @@ You can create your own cosmic reference for any human reference as specified be
To annotate with COSMIC variants during MuTect1/2 Variant Calling you need to create a compatible VCF file.
Download the coding and non-coding VCF files from [COSMIC](http://cancer.sanger.ac.uk/cosmic/download) and
process them with the [Create\_Cosmic.sh](https://github.com/SciLifeLab/Sarek/tree/master/scripts/Create_Cosmic.sh)
script for either GRCh37 or GRCh38. The script requires a fasta index `.fai`, of the reference file you are using.
script for either GRCh37 or GRCh38.
The script requires a fasta index `.fai`, of the reference file you are using.

Example:

Expand All @@ -87,11 +96,12 @@ igvtools index <cosmicvxx.vcf>

## smallGRCh37

Use `--genome smallGRCh37` to map against a small reference genome based on GRCh37. `smallGRCh37` is the default genome for the testing profile (`-profile testing`).
Use `--genome smallGRCh37` to map against a small reference genome based on GRCh37.
`smallGRCh37` is the default genome for the testing profile (`-profile testing`).

## AWS iGenomes
Sarek is using [AWS iGenomes](https://ewels.github.io/AWS-iGenomes/), which facilitate storing and sharing references.
Both `GRCh37` and `GRCh38` are available with `--genome iGRCh37` or `--genome iGRCh38` respectively, it contains all data previously detailed.
Both `GRCh37` and `GRCh38` are available with `--genome GRCh37` or `--genome GRCh38` respectively with any profile using the `conf/igenomes.config` file (eg.: `awsbatch`), or you can specify it with `-c conf/igenomes.config`, it contains all data previously detailed.

## buildReferences.nf

Expand Down
13 changes: 11 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ profiles {
// Singularity images need to be set up
standard {
includeConfig 'conf/base.config'
includeConfig 'conf/genomes.config'
includeConfig 'conf/uppmax-localhost.config'
includeConfig 'conf/singularity-path.config'
}
Expand All @@ -36,6 +37,7 @@ profiles {
// Singularity images need to be set up
slurm {
includeConfig 'conf/base.config'
includeConfig 'conf/genomes.config'
includeConfig 'conf/uppmax-slurm.config'
includeConfig 'conf/singularity-path.config'
}
Expand All @@ -44,6 +46,7 @@ profiles {
// Singularity images will be pulled automatically
slurmDownload {
includeConfig 'conf/base.config'
includeConfig 'conf/genomes.config'
includeConfig 'conf/uppmax-slurm.config'
includeConfig 'conf/singularity.config'
includeConfig 'conf/containers.config'
Expand All @@ -52,6 +55,7 @@ profiles {
// Docker images will be pulled automatically
docker {
includeConfig 'conf/base.config'
includeConfig 'conf/genomes.config'
includeConfig 'conf/travis.config'
includeConfig 'conf/docker.config'
includeConfig 'conf/containers.config'
Expand All @@ -60,6 +64,7 @@ profiles {
// Docker images will be pulled automatically
awsbatch {
includeConfig 'conf/base.config'
includeConfig 'conf/igenomes.config'
includeConfig 'conf/aws-batch.config'
includeConfig 'conf/docker.config'
includeConfig 'conf/containers.config'
Expand All @@ -68,6 +73,7 @@ profiles {
// Singularity images will be pulled automatically
singularity {
includeConfig 'conf/base.config'
includeConfig 'conf/genomes.config'
includeConfig 'conf/travis.config'
includeConfig 'conf/singularity.config'
includeConfig 'conf/containers.config'
Expand All @@ -76,6 +82,7 @@ profiles {
// Singularity images need to be set up
singularityPath {
includeConfig 'conf/base.config'
includeConfig 'conf/genomes.config'
includeConfig 'conf/travis.config'
includeConfig 'conf/singularity-path.config'
}
Expand All @@ -85,14 +92,16 @@ profiles {
// Singularity images will be pulled automatically
binac {
includeConfig 'conf/base.config'
includeConfig 'conf/genomes.config'
includeConfig 'conf/binac.config'
includeConfig 'conf/singularity.config'
includeConfig 'conf/resources.config'
includeConfig 'conf/containers.config'
}
// Default config for CFC cluster in Tuebingen/Germany
// Default config for CFC cluster in Tuebingen/Germany
cfc {
includeConfig 'conf/base.config'
includeConfig 'conf/genomes.config'
includeConfig 'conf/cfc.config'
includeConfig 'conf/singularity.config'
includeConfig 'conf/resources.config'
Expand Down Expand Up @@ -132,4 +141,4 @@ def check_max(obj, type) {
return obj
}
}
}
}