Skip to content
This repository was archived by the owner on Jan 27, 2020. It is now read-only.

Commit df83874

Browse files
authored
Merge pull request #697 from MaxUlysse/iGenomes
iGenomes specific config file
2 parents 347bebb + d4b3a1d commit df83874

7 files changed

Lines changed: 94 additions & 42 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
1212
- [#671](https://github.com/SciLifeLab/Sarek/pull/671) - New `publishDirMode` param and docs
1313
- [#673](https://github.com/SciLifeLab/Sarek/pull/673), [#675](https://github.com/SciLifeLab/Sarek/pull/675), [#676](https://github.com/SciLifeLab/Sarek/pull/676) - Profiles for BinAC and CFC clusters in Tübingen
1414
- [#679](https://github.com/SciLifeLab/Sarek/pull/679) - Add container for `CreateIntervalBeds`
15-
- [#692](https://github.com/SciLifeLab/Sarek/pull/692) - Add AWS iGenomes possibilities (currently under `iGRCh37` and `iGRCh38`)
15+
- [#692](https://github.com/SciLifeLab/Sarek/pull/692), [#697](https://github.com/SciLifeLab/Sarek/pull/697) - Add AWS iGenomes possibilities (within `conf/igenomes.conf`)
1616
- [#694](https://github.com/SciLifeLab/Sarek/pull/694) - Add monochrome and grey logos for light or dark background
1717

1818
### `Changed`

conf/aws-batch.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*/
99

1010
params {
11-
genome_base = params.genome == 'iGRCh37' ? "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh37" : params.genome == 'iGRCh38' ? "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38" : "s3://sarek-references/small"
11+
genome_base = params.genome == 'GRCh37' ? "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh37" : params.genome == 'GRCh38' ? "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38" : "s3://sarek-references/small"
1212
publishDirMode = 'copy'
1313
}
1414

conf/base.config

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
* -------------------------------------------------
77
*/
88

9-
includeConfig 'genomes.config'
109
wf_repository = 'maxulysse'
1110

1211
params {

conf/genomes.config

Lines changed: 4 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
* -------------------------------------------------
55
* Path to reference files
66
* -------------------------------------------------
7-
* Imported under all Nextflow profiles in
7+
* Imported under Nextflow profiles in
88
* nextflow.config
99
* -------------------------------------------------
10-
* Modify to add specific versions of genomes
10+
* Defines reference genomes, using paths
11+
* Can be used by any config that customises the base
12+
* path using $params.genome_base / --genome_base
1113
* -------------------------------------------------
1214
*/
1315

@@ -42,32 +44,6 @@ params {
4244
//AF_files = "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf"
4345
//AF_indexes = "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf.idx"
4446
}
45-
'iGRCh37' {
46-
acLoci = "${params.genome_base}/Annotation/ASCAT/1000G_phase3_20130502_SNP_maf0.3.loci"
47-
dbsnp = "${params.genome_base}/Annotation/GATKBundle/dbsnp_138.b37.vcf"
48-
dbsnpIndex = "${params.genome_base}/Annotation/GATKBundle/dbsnp_138.b37.vcf.idx"
49-
genomeFile = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta"
50-
genomeDict = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.dict"
51-
genomeIndex = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.fai"
52-
bwaIndex = "${params.genome_base}/Sequence/BWAIndex/human_g1k_v37_decoy.fasta.{amb,ann,bwt,pac,sa}"
53-
intervals = "${params.genome_base}/Annotation/intervals/wgs_calling_regions_CAW.list"
54-
knownIndels = "${params.genome_base}/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf"
55-
knownIndelsIndex = "${params.genome_base}/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.idx"
56-
snpeffDb = "GRCh37.75"
57-
}
58-
'iGRCh38' {
59-
acLoci = "${params.genome_base}/Annotation/ASCAT/1000G_phase3_GRCh38_maf0.3.loci"
60-
dbsnp = "${params.genome_base}/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz"
61-
dbsnpIndex = "${params.genome_base}/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi"
62-
genomeFile = "${params.genome_base}/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta"
63-
genomeDict = "${params.genome_base}/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict"
64-
genomeIndex = "${params.genome_base}/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai"
65-
bwaIndex = "${params.genome_base}/Sequence/BWAIndex/Homo_sapiens_assembly38.fasta.64.{alt,amb,ann,bwt,pac,sa}"
66-
intervals = "${params.genome_base}/Annotation/intervals/wgs_calling_regions.hg38.bed"
67-
knownIndels = "${params.genome_base}/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz"
68-
knownIndelsIndex = "${params.genome_base}/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi"
69-
snpeffDb = "GRCh38.86"
70-
}
7147
'smallGRCh37' {
7248
acLoci = "${params.genome_base}/1000G_phase3_20130502_SNP_maf0.3.small.loci"
7349
dbsnp = "${params.genome_base}/dbsnp_138.b37.small.vcf"

conf/igenomes.config

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/*
2+
* -------------------------------------------------
3+
* Nextflow config file for Sarek
4+
* -------------------------------------------------
5+
* Path to iGenomes reference files
6+
* -------------------------------------------------
7+
* Imported under Nextflow profiles in
8+
* nextflow.config
9+
* -------------------------------------------------
10+
* Defines reference genomes, using iGenome paths
11+
* Can be used by any config that customises the base
12+
* path using $params.genome_base / --genome_base
13+
* -------------------------------------------------
14+
*/
15+
16+
params {
17+
genomes {
18+
'GRCh37' {
19+
acLoci = "${params.genome_base}/Annotation/ASCAT/1000G_phase3_20130502_SNP_maf0.3.loci"
20+
dbsnp = "${params.genome_base}/Annotation/GATKBundle/dbsnp_138.b37.vcf"
21+
dbsnpIndex = "${params.genome_base}/Annotation/GATKBundle/dbsnp_138.b37.vcf.idx"
22+
genomeFile = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta"
23+
genomeDict = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.dict"
24+
genomeIndex = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.fai"
25+
bwaIndex = "${params.genome_base}/Sequence/BWAIndex/human_g1k_v37_decoy.fasta.{amb,ann,bwt,pac,sa}"
26+
intervals = "${params.genome_base}/Annotation/intervals/wgs_calling_regions_CAW.list"
27+
knownIndels = "${params.genome_base}/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf"
28+
knownIndelsIndex = "${params.genome_base}/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.idx"
29+
snpeffDb = "GRCh37.75"
30+
}
31+
'GRCh38' {
32+
acLoci = "${params.genome_base}/Annotation/ASCAT/1000G_phase3_GRCh38_maf0.3.loci"
33+
dbsnp = "${params.genome_base}/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz"
34+
dbsnpIndex = "${params.genome_base}/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi"
35+
genomeFile = "${params.genome_base}/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta"
36+
genomeDict = "${params.genome_base}/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict"
37+
genomeIndex = "${params.genome_base}/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai"
38+
bwaIndex = "${params.genome_base}/Sequence/BWAIndex/Homo_sapiens_assembly38.fasta.64.{alt,amb,ann,bwt,pac,sa}"
39+
intervals = "${params.genome_base}/Annotation/intervals/wgs_calling_regions.hg38.bed"
40+
knownIndels = "${params.genome_base}/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz"
41+
knownIndelsIndex = "${params.genome_base}/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi"
42+
snpeffDb = "GRCh38.86"
43+
}
44+
'smallGRCh37' {
45+
acLoci = "${params.genome_base}/1000G_phase3_20130502_SNP_maf0.3.small.loci"
46+
dbsnp = "${params.genome_base}/dbsnp_138.b37.small.vcf"
47+
dbsnpIndex = "${dbsnp}.idx"
48+
genomeFile = "${params.genome_base}/human_g1k_v37_decoy.small.fasta"
49+
bwaIndex = "${genomeFile}.{amb,ann,bwt,pac,sa}"
50+
genomeDict = "${params.genome_base}/human_g1k_v37_decoy.small.dict"
51+
genomeIndex = "${genomeFile}.fai"
52+
intervals = "${params.genome_base}/small.intervals"
53+
knownIndels = "${params.genome_base}/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.small.vcf"
54+
knownIndelsIndex = "${params.genome_base}/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.small.vcf.idx"
55+
snpeffDb = "GRCh37.75"
56+
}
57+
}
58+
}

docs/REFERENCES.md

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
# Genomes and reference files
22

3-
Sarek currently uses GRCh38 by default. The settings are in `genomes.config`, they can be tailored to your needs.
3+
Sarek currently uses GRCh38 by default.
4+
The settings are in `genomes.config`, they can be tailored to your needs.
45
The [`buildReferences.nf`](#buildreferencesnf) script is used to build the indexes for the reference test.
56

67
## GRCh37
78

8-
Use `--genome GRCh37` to map against GRCh37. Before doing so and if you are not on UPPMAX, you need to adjust the settings in `genomes.config` to your needs.
9+
Use `--genome GRCh37` to map against GRCh37.
10+
Before doing so and if you are not on UPPMAX, you need to adjust the settings in `genomes.config` to your needs.
911

1012
### GATK bundle
1113

@@ -20,21 +22,27 @@ The following files need to be downloaded:
2022

2123
### Other files for GRCh37
2224

23-
From our repo, get the [`intervals` list file](https://raw.githubusercontent.com/SciLifeLab/Sarek/master/repeats/wgs_calling_regions.grch37.list). More information about this file in the [intervals documentation](INTERVALS.md)
25+
From our repo, get the [`intervals` list file](https://raw.githubusercontent.com/SciLifeLab/Sarek/master/repeats/wgs_calling_regions.grch37.list).
26+
More information about this file in the [intervals documentation](INTERVALS.md)
2427

2528
Description of how to generate the Loci file used in the ASCAT process is described [here](https://github.com/SciLifeLab/Sarek/blob/master/docs/ASCAT.md).
2629

2730
You can create your own cosmic reference for any human reference as specified below in the Cosmic section.
2831

2932
## GRCh38
3033

31-
Use `--genome GRCh38` to map against GRCh38. Before doing so and if you are not on UPPMAX, you need to adjust the settings in `genomes.config` to your needs.
34+
Use `--genome GRCh38` to map against GRCh38.
35+
Before doing so and if you are not on UPPMAX, you need to adjust the settings in `genomes.config` to your needs.
3236

33-
To get the needed files, download the GATK bundle for GRCh38 from [ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/](ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/). You can also download the required files from the Google Cloud mirror link [here](https://console.cloud.google.com/storage/browser/genomics-public-data/resources/broad/hg38/v0).
37+
To get the needed files, download the GATK bundle for GRCh38 from [ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/](ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/).
38+
You can also download the required files from the Google Cloud mirror link [here](https://console.cloud.google.com/storage/browser/genomics-public-data/resources/broad/hg38/v0).
3439

3540
The MD5SUM of `Homo_sapiens_assembly38.fasta` included in that file is 7ff134953dcca8c8997453bbb80b6b5e.
3641

37-
If you download the data from the FTP servers `beta/` directory, which seems to be an older version of the bundle, only `Homo_sapiens_assembly38.known_indels.vcf` is needed. Also, you can omit `dbsnp_138_` and `dbsnp_144` files as we use `dbsnp_146`. The old ones also use the wrong chromosome naming convention. The Google Cloud mirror has all data in the `v0` directory, but requires you to remove the `resources_broad_hg38_v0_` prefixes from all files.
42+
If you download the data from the FTP servers `beta/` directory, which seems to be an older version of the bundle, only `Homo_sapiens_assembly38.known_indels.vcf` is needed.
43+
Also, you can omit `dbsnp_138_` and `dbsnp_144` files as we use `dbsnp_146`.
44+
The old ones also use the wrong chromosome naming convention.
45+
The Google Cloud mirror has all data in the `v0` directory, but requires you to remove the `resources_broad_hg38_v0_` prefixes from all files.
3846

3947
The following files need to be downloaded:
4048

@@ -68,7 +76,8 @@ You can create your own cosmic reference for any human reference as specified be
6876
To annotate with COSMIC variants during MuTect1/2 Variant Calling you need to create a compatible VCF file.
6977
Download the coding and non-coding VCF files from [COSMIC](http://cancer.sanger.ac.uk/cosmic/download) and
7078
process them with the [Create\_Cosmic.sh](https://github.com/SciLifeLab/Sarek/tree/master/scripts/Create_Cosmic.sh)
71-
script for either GRCh37 or GRCh38. The script requires a fasta index `.fai`, of the reference file you are using.
79+
script for either GRCh37 or GRCh38.
80+
The script requires a fasta index `.fai`, of the reference file you are using.
7281

7382
Example:
7483

@@ -87,11 +96,12 @@ igvtools index <cosmicvxx.vcf>
8796

8897
## smallGRCh37
8998

90-
Use `--genome smallGRCh37` to map against a small reference genome based on GRCh37. `smallGRCh37` is the default genome for the testing profile (`-profile testing`).
99+
Use `--genome smallGRCh37` to map against a small reference genome based on GRCh37.
100+
`smallGRCh37` is the default genome for the testing profile (`-profile testing`).
91101

92102
## AWS iGenomes
93103
Sarek is using [AWS iGenomes](https://ewels.github.io/AWS-iGenomes/), which facilitate storing and sharing references.
94-
Both `GRCh37` and `GRCh38` are available with `--genome iGRCh37` or `--genome iGRCh38` respectively, it contains all data previously detailed.
104+
Both `GRCh37` and `GRCh38` are available with `--genome GRCh37` or `--genome GRCh38` respectively with any profile using the `conf/igenomes.config` file (eg.: `awsbatch`), or you can specify it with `-c conf/igenomes.config`, it contains all data previously detailed.
95105

96106
## buildReferences.nf
97107

nextflow.config

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ profiles {
2828
// Singularity images need to be set up
2929
standard {
3030
includeConfig 'conf/base.config'
31+
includeConfig 'conf/genomes.config'
3132
includeConfig 'conf/uppmax-localhost.config'
3233
includeConfig 'conf/singularity-path.config'
3334
}
@@ -36,6 +37,7 @@ profiles {
3637
// Singularity images need to be set up
3738
slurm {
3839
includeConfig 'conf/base.config'
40+
includeConfig 'conf/genomes.config'
3941
includeConfig 'conf/uppmax-slurm.config'
4042
includeConfig 'conf/singularity-path.config'
4143
}
@@ -44,6 +46,7 @@ profiles {
4446
// Singularity images will be pulled automatically
4547
slurmDownload {
4648
includeConfig 'conf/base.config'
49+
includeConfig 'conf/genomes.config'
4750
includeConfig 'conf/uppmax-slurm.config'
4851
includeConfig 'conf/singularity.config'
4952
includeConfig 'conf/containers.config'
@@ -52,6 +55,7 @@ profiles {
5255
// Docker images will be pulled automatically
5356
docker {
5457
includeConfig 'conf/base.config'
58+
includeConfig 'conf/genomes.config'
5559
includeConfig 'conf/travis.config'
5660
includeConfig 'conf/docker.config'
5761
includeConfig 'conf/containers.config'
@@ -60,6 +64,7 @@ profiles {
6064
// Docker images will be pulled automatically
6165
awsbatch {
6266
includeConfig 'conf/base.config'
67+
includeConfig 'conf/igenomes.config'
6368
includeConfig 'conf/aws-batch.config'
6469
includeConfig 'conf/docker.config'
6570
includeConfig 'conf/containers.config'
@@ -68,6 +73,7 @@ profiles {
6873
// Singularity images will be pulled automatically
6974
singularity {
7075
includeConfig 'conf/base.config'
76+
includeConfig 'conf/genomes.config'
7177
includeConfig 'conf/travis.config'
7278
includeConfig 'conf/singularity.config'
7379
includeConfig 'conf/containers.config'
@@ -76,6 +82,7 @@ profiles {
7682
// Singularity images need to be set up
7783
singularityPath {
7884
includeConfig 'conf/base.config'
85+
includeConfig 'conf/genomes.config'
7986
includeConfig 'conf/travis.config'
8087
includeConfig 'conf/singularity-path.config'
8188
}
@@ -85,14 +92,16 @@ profiles {
8592
// Singularity images will be pulled automatically
8693
binac {
8794
includeConfig 'conf/base.config'
95+
includeConfig 'conf/genomes.config'
8896
includeConfig 'conf/binac.config'
8997
includeConfig 'conf/singularity.config'
9098
includeConfig 'conf/resources.config'
9199
includeConfig 'conf/containers.config'
92100
}
93-
// Default config for CFC cluster in Tuebingen/Germany
101+
// Default config for CFC cluster in Tuebingen/Germany
94102
cfc {
95103
includeConfig 'conf/base.config'
104+
includeConfig 'conf/genomes.config'
96105
includeConfig 'conf/cfc.config'
97106
includeConfig 'conf/singularity.config'
98107
includeConfig 'conf/resources.config'
@@ -132,4 +141,4 @@ def check_max(obj, type) {
132141
return obj
133142
}
134143
}
135-
}
144+
}

0 commit comments

Comments
 (0)