Skip to content

Commit f4cc3b8

Browse files
author
Szilveszter Juhos
committed
Merge remote-tracking branch 'upstream/master'
2 parents efe0001 + 1b1e28f commit f4cc3b8

9 files changed

Lines changed: 175 additions & 300 deletions

File tree

annotate.nf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,7 @@ def sarekMessage() {
351351

352352
def startMessage() {
353353
// Display start message
354+
SarekUtils.sarek_ascii()
354355
this.sarekMessage()
355356
this.minimalInformationMessage()
356357
}

buildContainers.nf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ def sarekMessage() {
248248

249249
def startMessage() {
250250
// Display start message
251+
SarekUtils.sarek_ascii()
251252
this.sarekMessage()
252253
this.minimalInformationMessage()
253254
}

buildReferences.nf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,7 @@ def sarekMessage() {
296296

297297
def startMessage() {
298298
// Display start message
299+
SarekUtils.sarek_ascii()
299300
this.sarekMessage()
300301
this.minimalInformationMessage()
301302
}

germlineVC.nf

Lines changed: 5 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ directoryMap = SarekUtils.defineDirectoryMap(params.outDir)
6565
referenceMap = defineReferenceMap()
6666
toolList = defineToolList()
6767

68-
if (!checkReferenceMap(referenceMap)) exit 1, 'Missing Reference file(s), see --help for more information'
69-
if (!checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information'
68+
if (!SarekUtils.checkReferenceMap(referenceMap)) exit 1, 'Missing Reference file(s), see --help for more information'
69+
if (!SarekUtils.checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information'
7070

7171
if (params.test && params.genome in ['GRCh37', 'GRCh38']) {
7272
referenceMap.intervals = file("$workflow.projectDir/repeats/tiny_${params.genome}.list")
@@ -88,11 +88,9 @@ else tsvPath = "${directoryMap.recalibrated}/recalibrated.tsv"
8888
bamFiles = Channel.empty()
8989
if (tsvPath) {
9090
tsvFile = file(tsvPath)
91-
bamFiles = extractBams(tsvFile)
91+
bamFiles = SarekUtils.extractBams(tsvFile, "germline")
9292
} else exit 1, 'No sample were defined, see --help'
9393

94-
(patientGenders, bamFiles) = extractGenders(bamFiles)
95-
9694
/*
9795
================================================================================
9896
= P R O C E S S E S =
@@ -273,9 +271,7 @@ recalTables = recalTables
273271
[patient, sample, bam, bai, intervalBed, recalTable] }
274272

275273
// re-associate the BAMs and samples with the recalibration table
276-
bamsForHC = bamsForHC
277-
.phase(recalTables) { it[0..4] }
278-
.map { it1, it2 -> it1 + [it2[6]] }
274+
bamsForHC = bamsForHC.join(recalTables, by:[0,1,2,3,4])
279275

280276
bamsAll = bamsNormal.combine(bamsTumor)
281277

@@ -643,50 +639,11 @@ process GetVersionVCFtools {
643639
================================================================================
644640
*/
645641

646-
def checkFileExtension(it, extension) {
647-
// Check file extension
648-
if (!it.toString().toLowerCase().endsWith(extension.toLowerCase())) exit 1, "File: ${it} has the wrong extension: ${extension} see --help for more information"
649-
}
650-
651-
def checkParameterExistence(it, list) {
652-
// Check parameter existence
653-
if (!list.contains(it)) {
654-
println("Unknown parameter: ${it}")
655-
return false
656-
}
657-
return true
658-
}
659-
660-
def checkParameterList(list, realList) {
661-
// Loop through all parameters to check their existence and spelling
662-
return list.every{ checkParameterExistence(it, realList) }
663-
}
664-
665642
def checkParamReturnFile(item) {
666643
params."${item}" = params.genomes[params.genome]."${item}"
667644
return file(params."${item}")
668645
}
669646

670-
def checkReferenceMap(referenceMap) {
671-
// Loop through all the references files to check their existence
672-
referenceMap.every {
673-
referenceFile, fileToCheck ->
674-
checkRefExistence(referenceFile, fileToCheck)
675-
}
676-
}
677-
678-
def checkRefExistence(referenceFile, fileToCheck) {
679-
if (fileToCheck instanceof List) return fileToCheck.every{ checkRefExistence(referenceFile, it) }
680-
def f = file(fileToCheck)
681-
// this is an expanded wildcard: we can assume all files exist
682-
if (f instanceof List && f.size() > 0) return true
683-
else if (!f.exists()) {
684-
log.info "Missing references: ${referenceFile} ${fileToCheck}"
685-
return false
686-
}
687-
return true
688-
}
689-
690647
def checkUppmaxProject() {
691648
// check if UPPMAX project number is specified
692649
return !(workflow.profile == 'slurm' && !params.project)
@@ -720,39 +677,6 @@ def defineToolList() {
720677
]
721678
}
722679

723-
def extractBams(tsvFile) {
724-
// Channeling the TSV file containing BAM.
725-
// Format is: "subject gender status sample bam bai"
726-
Channel
727-
.from(tsvFile.readLines())
728-
.map{line ->
729-
def list = returnTSV(line.split(),6)
730-
def idPatient = list[0]
731-
def gender = list[1]
732-
def status = returnStatus(list[2].toInteger())
733-
def idSample = list[3]
734-
def bamFile = returnFile(list[4])
735-
def baiFile = returnFile(list[5])
736-
737-
checkFileExtension(bamFile,".bam")
738-
checkFileExtension(baiFile,".bai")
739-
740-
[ idPatient, gender, status, idSample, bamFile, baiFile ]
741-
}
742-
}
743-
744-
def extractGenders(channel) {
745-
def genders = [:] // an empty map
746-
channel = channel.map{ it ->
747-
def idPatient = it[0]
748-
def gender = it[1]
749-
genders[idPatient] = gender
750-
751-
[idPatient] + it[2..-1]
752-
}
753-
[genders, channel]
754-
}
755-
756680
def generateIntervalsForVC(bams, intervals) {
757681
def (bamsNew, bamsForVC) = bams.into(2)
758682
def (intervalsNew, vcIntervals) = intervals.into(2)
@@ -826,34 +750,14 @@ def nextflowMessage() {
826750
log.info "N E X T F L O W ~ version ${workflow.nextflow.version} ${workflow.nextflow.build}"
827751
}
828752

829-
def returnFile(it) {
830-
// return file if it exists
831-
if (!file(it).exists()) exit 1, "Missing file in TSV file: ${it}, see --help for more information"
832-
return file(it)
833-
}
834-
835-
def returnStatus(it) {
836-
// Return status if it's correct
837-
// Status should be only 0 or 1
838-
// 0 being normal
839-
// 1 being tumor (or relapse or anything that is not normal...)
840-
if (!(it in [0, 1])) exit 1, "Status is not recognized in TSV file: ${it}, see --help for more information"
841-
return it
842-
}
843-
844-
def returnTSV(it, number) {
845-
// return TSV if it has the correct number of items in row
846-
if (it.size() != number) exit 1, "Malformed row in TSV file: ${it}, see --help for more information"
847-
return it
848-
}
849-
850753
def sarekMessage() {
851754
// Display Sarek message
852755
log.info "Sarek - Workflow For Somatic And Germline Variations ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "")
853756
}
854757

855758
def startMessage() {
856759
// Display start message
760+
SarekUtils.sarek_ascii()
857761
this.sarekMessage()
858762
this.minimalInformationMessage()
859763
}

lib/SarekUtils.groovy

Lines changed: 126 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,28 @@
1-
class MyUtils {
2-
// Check if params is in this given list
1+
import static nextflow.Nextflow.file
2+
import nextflow.Channel
3+
4+
class SarekUtils {
5+
6+
// Check file extension
7+
static def checkFileExtension(it, extension) {
8+
if (!it.toString().toLowerCase().endsWith(extension.toLowerCase())) exit 1, "File: ${it} has the wrong extension: ${extension} see --help for more information"
9+
}
10+
11+
// Check parameter existence
12+
static def checkParameterExistence(it, list) {
13+
if (!list.contains(it)) {
14+
println("Unknown parameter: ${it}")
15+
return false
16+
}
17+
return true
18+
}
19+
20+
// Compare each parameter with a list of parameters
21+
static def checkParameterList(list, realList) {
22+
return list.every{ checkParameterExistence(it, realList) }
23+
}
24+
25+
// Return element in list of allowed params
326
static def checkParams(it) {
427
return it in [
528
'annotate-tools',
@@ -68,15 +91,22 @@ class MyUtils {
6891
'version']
6992
}
7093

71-
// Loop through all parameters to check their existence and spelling
72-
static def checkParameterList(list, realList) {
73-
return list.every{ checkParameterExistence(it, realList) }
94+
// Loop through all the references files to check their existence
95+
static def checkReferenceMap(referenceMap) {
96+
referenceMap.every {
97+
referenceFile, fileToCheck ->
98+
SarekUtils.checkRefExistence(referenceFile, fileToCheck)
99+
}
74100
}
75101

76-
// Check parameter existence
77-
static def checkParameterExistence(it, list) {
78-
if (!list.contains(it)) {
79-
println("Unknown parameter: ${it}")
102+
// Loop through all the references files to check their existence
103+
static def checkRefExistence(referenceFile, fileToCheck) {
104+
if (fileToCheck instanceof List) return fileToCheck.every{ SarekUtils.checkRefExistence(referenceFile, it) }
105+
def f = file(fileToCheck)
106+
// this is an expanded wildcard: we can assume all files exist
107+
if (f instanceof List && f.size() > 0) return true
108+
else if (!f.exists()) {
109+
this.log.info "Missing references: ${referenceFile} ${fileToCheck}"
80110
return false
81111
}
82112
return true
@@ -85,32 +115,66 @@ class MyUtils {
85115
// Define map of directories
86116
static def defineDirectoryMap(outDir) {
87117
return [
88-
'nonRealigned' : "${outDir}/Preprocessing/NonRealigned",
89-
'nonRecalibrated' : "${outDir}/Preprocessing/NonRecalibrated",
90-
'recalibrated' : "${outDir}/Preprocessing/Recalibrated",
91-
'ascat' : "${outDir}/VariantCalling/Ascat",
92-
'freebayes' : "${outDir}/VariantCalling/FreeBayes",
93-
'gvcf-hc' : "${outDir}/VariantCalling/HaplotypeCallerGVCF",
94-
'haplotypecaller' : "${outDir}/VariantCalling/HaplotypeCaller",
95-
'manta' : "${outDir}/VariantCalling/Manta",
96-
'mutect1' : "${outDir}/VariantCalling/MuTect1",
97-
'mutect2' : "${outDir}/VariantCalling/MuTect2",
98-
'strelka' : "${outDir}/VariantCalling/Strelka",
99-
'strelkabp' : "${outDir}/VariantCalling/StrelkaBP",
100-
'snpeff' : "${outDir}/Annotation/SnpEff",
101-
'vep' : "${outDir}/Annotation/VEP",
102-
'bamQC' : "${outDir}/Reports/bamQC",
103-
'bcftoolsStats' : "${outDir}/Reports/BCFToolsStats",
104-
'fastQC' : "${outDir}/Reports/FastQC",
105-
'markDuplicatesQC' : "${outDir}/Reports/MarkDuplicates",
106-
'multiQC' : "${outDir}/Reports/MultiQC",
107-
'samtoolsStats' : "${outDir}/Reports/SamToolsStats",
108-
'snpeffReports' : "${outDir}/Reports/SnpEff",
109-
'vcftools' : "${outDir}/Reports/VCFTools",
110-
'version' : "${outDir}/Reports/ToolsVersion"
118+
'nonRealigned' : "${outDir}/Preprocessing/NonRealigned",
119+
'nonRecalibrated' : "${outDir}/Preprocessing/NonRecalibrated",
120+
'recalibrated' : "${outDir}/Preprocessing/Recalibrated",
121+
'ascat' : "${outDir}/VariantCalling/Ascat",
122+
'freebayes' : "${outDir}/VariantCalling/FreeBayes",
123+
'gvcf-hc' : "${outDir}/VariantCalling/HaplotypeCallerGVCF",
124+
'haplotypecaller' : "${outDir}/VariantCalling/HaplotypeCaller",
125+
'manta' : "${outDir}/VariantCalling/Manta",
126+
'mutect1' : "${outDir}/VariantCalling/MuTect1",
127+
'mutect2' : "${outDir}/VariantCalling/MuTect2",
128+
'strelka' : "${outDir}/VariantCalling/Strelka",
129+
'strelkabp' : "${outDir}/VariantCalling/StrelkaBP",
130+
'snpeff' : "${outDir}/Annotation/SnpEff",
131+
'vep' : "${outDir}/Annotation/VEP",
132+
'bamQC' : "${outDir}/Reports/bamQC",
133+
'bcftoolsStats' : "${outDir}/Reports/BCFToolsStats",
134+
'fastQC' : "${outDir}/Reports/FastQC",
135+
'markDuplicatesQC' : "${outDir}/Reports/MarkDuplicates",
136+
'multiQC' : "${outDir}/Reports/MultiQC",
137+
'samtoolsStats' : "${outDir}/Reports/SamToolsStats",
138+
'snpeffReports' : "${outDir}/Reports/SnpEff",
139+
'vcftools' : "${outDir}/Reports/VCFTools",
140+
'version' : "${outDir}/Reports/ToolsVersion"
111141
]
112142
}
113143

144+
// Channeling the TSV file containing BAM.
145+
// Format is: "subject gender status sample bam bai"
146+
static def extractBams(tsvFile, mode) {
147+
Channel
148+
.from(tsvFile.readLines())
149+
.map{line ->
150+
def list = SarekUtils.returnTSV(line.split(),6)
151+
def idPatient = list[0]
152+
def gender = list[1]
153+
def status = SarekUtils.returnStatus(list[2].toInteger())
154+
def idSample = list[3]
155+
def bamFile = SarekUtils.returnFile(list[4])
156+
def baiFile = SarekUtils.returnFile(list[5])
157+
158+
SarekUtils.checkFileExtension(bamFile,".bam")
159+
SarekUtils.checkFileExtension(baiFile,".bai")
160+
161+
if (mode == "germline") return [ idPatient, status, idSample, bamFile, baiFile ]
162+
else return [ idPatient, gender, status, idSample, bamFile, baiFile ]
163+
}
164+
}
165+
166+
// Extract gender from Channel as it's only used for CNVs
167+
static def extractGenders(channel) {
168+
def genders = [:]
169+
channel = channel.map{ it ->
170+
def idPatient = it[0]
171+
def gender = it[1]
172+
genders[idPatient] = gender
173+
[idPatient] + it[2..-1]
174+
}
175+
[genders, channel]
176+
}
177+
114178
// Compare params to list of verified params
115179
static def isAllowedParams(params) {
116180
final test = true
@@ -122,4 +186,34 @@ class MyUtils {
122186
}
123187
return test
124188
}
189+
190+
// Return file if it exists
191+
static def returnFile(it) {
192+
if (!file(it).exists()) exit 1, "Missing file in TSV file: ${it}, see --help for more information"
193+
return file(it)
194+
}
195+
196+
// Return status [0,1]
197+
// 0 == Normal, 1 == Tumor
198+
static def returnStatus(it) {
199+
if (!(it in [0, 1])) exit 1, "Status is not recognized in TSV file: ${it}, see --help for more information"
200+
return it
201+
}
202+
203+
// Return TSV if it has the correct number of items in row
204+
static def returnTSV(it, number) {
205+
if (it.size() != number) exit 1, "Malformed row in TSV file: ${it}, see --help for more information"
206+
return it
207+
}
208+
209+
// Sarek ascii art
210+
static def sarek_ascii() {
211+
println " ____ _____ _ "
212+
println " .' _ `. / ____| | | "
213+
println " / |\\`-_ \\ | (___ __ _ _ __ ___| | __ "
214+
println "| | \\ `-| \\___ \\ / _` | '__/ __| |/ / "
215+
println " \\ | \\ / ____) | (_| | | | __| < "
216+
println " `|____\\' |_____/ \\__,_|_| \\___|_|\\_\\ "
217+
}
218+
125219
}

0 commit comments

Comments
 (0)