Skip to content
This repository was archived by the owner on Jan 27, 2020. It is now read-only.

Commit 1901ccd

Browse files
authored
Merge branch 'dev' into BTB
2 parents 804f3ad + e8469c6 commit 1901ccd

9 files changed

Lines changed: 84 additions & 17 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
2424
- [#671](https://github.com/SciLifeLab/Sarek/pull/671) - publishDir modes are now params
2525
- [#677](https://github.com/SciLifeLab/Sarek/pull/677), [#698](https://github.com/SciLifeLab/Sarek/pull/698) - Update docs
2626
- [#679](https://github.com/SciLifeLab/Sarek/pull/679) - Update old awsbatch configuration
27+
- [#682](https://github.com/SciLifeLab/Sarek/pull/682) - Specifications for memory and cpus for awsbatch
2728
- [#693](https://github.com/SciLifeLab/Sarek/pull/693) - Qualimap bamQC is now ran after mapping and after recalibration for better QC
2829
- [#700](https://github.com/SciLifeLab/Sarek/pull/700) - Update GATK to `4.0.9.0`
2930
- [#702](https://github.com/SciLifeLab/Sarek/pull/702) - update FastQC to `0.11.8`
31+
- [#705](https://github.com/SciLifeLab/Sarek/pull/705) - Change `--TMP_DIR` by `--tmp-dir` for GATK `4.0.9.0` BaseRecalibrator
3032

3133
### `Fixed`
3234

@@ -35,6 +37,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
3537
- [#679](https://github.com/SciLifeLab/Sarek/pull/679) - Add publishDirMode for `germlineVC.nf`
3638
- [#700](https://github.com/SciLifeLab/Sarek/pull/700) - Fix [#699](https://github.com/SciLifeLab/Sarek/issues/699) missing DP in the FORMAT column VCFs for MuTect2
3739
- [#702](https://github.com/SciLifeLab/Sarek/pull/702) - Fix [#701](https://github.com/SciLifeLab/Sarek/issues/701)
40+
- [#705](https://github.com/SciLifeLab/Sarek/pull/705) - Fix [#704](https://github.com/SciLifeLab/Sarek/issues/704)
3841

3942
## [2.2.1] - 2018-10-04
4043

conf/aws-batch.config

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,21 @@
1010
params {
1111
genome_base = params.genome == 'GRCh37' ? "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh37" : params.genome == 'GRCh38' ? "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38" : "s3://sarek-references/small"
1212
publishDirMode = 'copy'
13+
singleCPUMem = 7.GB // To make the uppmax slurm copy paste work.
14+
localReportDir = 'Reports'
1315
}
1416

1517
executor {
1618
name = 'awsbatch'
1719
awscli = '/home/ec2-user/miniconda/bin/aws'
1820
}
1921

22+
/* Rolling files are currently not supported on s3 */
23+
report.file = "${params.localReportDir}/Sarek_report.html"
24+
timeline.file = "${params.localReportDir}/Sarek_timeline.html"
25+
dag.file = "${params.localReportDir}/Sarek_DAG.svg"
26+
trace.file = "${params.localReportDir}/Sarek_trace.txt"
27+
2028
process {
2129
queue = params.awsqueue
2230

@@ -26,4 +34,29 @@ process {
2634
cpus = 2
2735
memory = 8.GB
2836

37+
withName:RunBcftoolsStats {
38+
cpus = 1
39+
memory = {params.singleCPUMem * 2} // Memory is doubled so that it won't run two on the same instance
40+
// Use a tiny queue for this one, so storage doesn't run out
41+
queue = params.awsqueue_tiny
42+
}
43+
withName:RunVcftools {
44+
cpus = 1
45+
memory = {params.singleCPUMem * 2} // Memory is doubled so that it won't run two on the same instance
46+
// Use a tiny queue for this one, so storage doesn't run out
47+
queue = params.awsqueue_tiny
48+
}
49+
withName:RunHaplotypecaller {
50+
cpus = 1
51+
// Increase memory quadratically
52+
memory = {params.singleCPUMem * 2} // Memory is doubled so that it won't run two on the same instance
53+
// Use a tiny queue for this one, so storage doesn't run out
54+
queue = params.awsqueue_tiny
55+
}
56+
withName:RunGenotypeGVCFs {
57+
cpus = 1
58+
memory = {params.singleCPUMem * 2} // Memory is doubled so that it won't run two on the same instance
59+
// Use a tiny queue for this one, so storage doesn't run out
60+
queue = params.awsqueue_tiny
61+
}
2962
}

conf/base.config

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ params {
3838
test = false // Not testing by default
3939
verbose = false // Enable for more verbose information
4040
awsqueue = false // Queue has to be provided when using awsbatch executor
41+
awsqueue_tiny = params.awsqueue // A separate queue with smaller instance types
42+
localReportDir = false // Used by AWS since reporting is not fully supported on s3 buckets
4143
}
4244

4345
process {
@@ -67,6 +69,6 @@ dag { // Turning on dag by default
6769

6870
trace { // Turning on trace tracking by default
6971
enabled = true
70-
fields = 'process,task_id,hash,name,attempt,status,exit,realtime,%cpu,vmem,rss,submit,start,complete,duration,realtime,rchar,wchar'
72+
fields = 'process,task_id,hash,name,attempt,status,exit,realtime,cpus,memory,%cpu,vmem,rss,submit,start,complete,duration,realtime,rchar,wchar'
7173
file = "${params.outDir}/Reports/Sarek_trace.txt"
7274
}

conf/resources.config

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,25 +21,27 @@ process {
2121

2222
withName:MapReads {
2323
memory = { check_max( 60.GB * task.attempt, 'memory' ) }
24-
cpus = { check_max( 10, 'cpus' ) }
24+
cpus = { check_max( 16, 'cpus' ) }
2525
}
2626
withName:CreateRecalibrationTable {
27-
cpus = { check_max( 12, 'cpus' ) }
28-
memory = {params.singleCPUMem * 8 * task.attempt}
27+
cpus = { check_max( 1, 'cpus' ) }
28+
memory = { check_max( 60.GB * task.attempt, 'memory') }
2929
}
3030
withName:MarkDuplicates {
31-
// Actually the -Xmx value should be kept lower
31+
// Actually the -Xmx value should be kept lower,
32+
// and is set through the markdup_java_options
3233
cpus = { check_max( 8, 'cpus' ) }
3334
memory = { check_max( 8.GB * task.attempt, 'memory' ) }
3435
}
3536
withName:MergeBams {
37+
cpus = { check_max( 4, 'cpus') }
3638
memory = {params.singleCPUMem * task.attempt}
3739
time = { check_max( 5.h * task.attempt, 'time' ) }
3840
}
3941
withName:RecalibrateBam {
40-
cpus = { check_max( 12, 'cpus' ) }
41-
memory = { check_max( 7.GB * 8 * task.attempt, 'memory' ) }
42-
time = { check_max( 5.h * task.attempt, 'time' ) }
42+
cpus = { check_max( 2, 'cpus' ) }
43+
memory = { check_max( 7.GB * 2 * task.attempt, 'memory' ) }
44+
time = { check_max( 10.h * task.attempt, 'time' ) }
4345
}
4446
withName:RunAlleleCount {
4547
cpus = { check_max( 1, 'cpus' ) }
@@ -49,6 +51,14 @@ process {
4951
cpus = { check_max( 1, 'cpus' ) }
5052
memory = { check_max( 14.GB * task.attempt, 'memory' ) }
5153
}
54+
withName:RunBamQCmapped {
55+
cpus = { check_max( 6, 'cpus' ) }
56+
memory = { check_max( 70.GB, 'memory' ) }
57+
}
58+
withName:RunBamQCrecalibrated {
59+
cpus = { check_max( 6, 'cpus' ) }
60+
memory = { check_max( 70.GB, 'memory' ) }
61+
}
5262
withName:RunBcftoolsStats {
5363
cpus = { check_max( 1, 'cpus' ) }
5464
}
@@ -65,13 +75,13 @@ process {
6575
memory = { check_max( 8.GB * task.attempt, 'memory' ) }
6676
}
6777
withName:RunHaplotypecaller {
68-
cpus = { check_max( 20, 'cpus' ) }
78+
cpus = { check_max( 1, 'cpus' ) }
6979
// Increase memory quadratically
7080
memory = { check_max( 7.GB * 2 * task.attempt, 'memory' ) }
7181
time = { check_max( 5.h * task.attempt, 'time' ) }
7282
}
7383
withName:RunGenotypeGVCFs {
74-
cpus = { check_max( 20, 'cpus' ) }
84+
cpus = { check_max( 1, 'cpus' ) }
7585
memory = { check_max( 7.GB * task.attempt, 'memory' ) }
7686
}
7787
withName:RunMultiQC {
@@ -86,20 +96,24 @@ process {
8696
cpus = { check_max( 2, 'cpus' ) }
8797
time = { check_max( 5.h * task.attempt, 'time' ) }
8898
}
99+
withName:RunSingleManta {
100+
cpus = { check_max( 20, 'cpus' ) }
101+
memory = { check_max( 16.GB, 'memory') }
102+
}
89103
withName:RunSingleStrelka {
104+
cpus = { check_max( 20, 'cpus' ) }
105+
memory = { check_max( 16.GB, 'memory') }
90106
time = { check_max( 5.h * task.attempt, 'time' ) }
91107
}
92108
withName:RunSnpeff {
93109
cpus = { check_max( 1, 'cpus' ) }
94-
errorStrategy = { task.exitStatus == 143 ? 'retry' : 'ignore' }
95110
}
96111
withName:RunStrelka {
97112
cpus = { check_max( 1, 'cpus' ) }
98113
time = { check_max( 5.h * task.attempt, 'time' ) }
99114
}
100115
withName:RunVEP {
101-
cpus = { check_max( 1, 'cpus' ) }
116+
cpus = { check_max( 16, 'cpus' ) }
102117
memory = {check_max (32.GB * task.attempt, 'memory' ) }
103-
errorStrategy = { task.exitStatus == 143 ? 'retry' : 'ignore' }
104118
}
105-
}
119+
}

conf/uppmax-slurm.config

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ params {
1212
singleCPUMem = 7.GB // for processes that are using more memory but a single CPU only. Use the 'core' queue for these
1313
}
1414

15+
// Extended set of fields, e.g. native_id, cpu and memory:
16+
trace.fields = 'process,task_id,hash,name,native_id,attempt,status,exit,realtime,cpus,memory,%cpu,vmem,rss,submit,start,complete,duration,realtime,rchar,wchar'
17+
1518
process {
1619
clusterOptions = {"-A $params.project"}
1720
cpus = 16

docs/PARAMETERS.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,14 @@ So you can write `--tools mutect2,ascat` or `--tools MuTect2,ASCAT` without worr
6262

6363
Only required if you use the awsbatch profile. This parameter specifies the queue for which jobs are submitted in AWS Batch.
6464

65+
### --awsqueue_tiny `BatchQueueName`
66+
67+
Only used if you use the awsbatch profile. This parameter specifies a queue used for certain small jobs that might still require a significant amount of disk storage.
68+
69+
### --localReportDir `Directory`
70+
71+
Only used if you use the awsbatch profile. This parameter specifies an output directory for nextflow reports, such as Sarek_timeline.html, which currently is not fully supported to store on s3.
72+
6573
### --verbose
6674

6775
Display more information about files being processed.

lib/SarekUtils.groovy

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class SarekUtils {
3636
'annotateTools',
3737
'annotateVCF',
3838
'awsqueue',
39+
'awsqueue_tiny',
3940
'build',
4041
'call-name',
4142
'callName',
@@ -52,6 +53,8 @@ class SarekUtils {
5253
'genome',
5354
'genomes',
5455
'help',
56+
'localReportDir',
57+
'local-report-dir',
5558
'markdup_java_options',
5659
'max_cpus',
5760
'max_memory',

main.nf

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ process CreateRecalibrationTable {
352352
BaseRecalibrator \
353353
--input ${bam} \
354354
--output ${idSample}.recal.table \
355-
--TMP_DIR /tmp \
355+
--tmp-dir /tmp \
356356
-R ${genomeFile} \
357357
-L ${intervals} \
358358
--known-sites ${dbsnp} \
@@ -418,8 +418,8 @@ process RecalibrateBam {
418418
--input ${bam} \
419419
--output ${idSample}.recal.bam \
420420
-L ${intervals} \
421-
--create-output-bam-index true \
422-
--bqsr-recal-file ${recalibrationReport}
421+
--create-output-bam-index true \
422+
--bqsr-recal-file ${recalibrationReport}
423423
"""
424424
}
425425
// Creating a TSV file to restart from this step

nextflow.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ profiles {
3030
includeConfig 'conf/igenomes.config'
3131
includeConfig 'conf/aws-batch.config'
3232
includeConfig 'conf/docker.config'
33+
includeConfig 'conf/resources.config'
3334
includeConfig 'conf/containers.config'
3435
}
3536
// Default config for german BinAC cluster

0 commit comments

Comments
 (0)