Skip to content

Commit dc064f1

Browse files
authored
Merge pull request #620 from maxulysse/dev_samplesheet_check
Improve samplesheet check + gender -> sex
2 parents aa88368 + 1d94868 commit dc064f1

43 files changed

Lines changed: 153 additions & 140 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
3434
- [#597](https://github.com/nf-core/sarek/pull/597) - Added tiddit for tumor variant calling
3535
- [#600](https://github.com/nf-core/sarek/pull/600) - Added description for UMI related params in schema
3636
- [#604](https://github.com/nf-core/sarek/pull/604), [#617](https://github.com/nf-core/sarek/pull/617) - Added full size tests WGS 30x NA12878
37+
- [#620](https://github.com/nf-core/sarek/pull/620) - Added checks for sex information when running a CNV tools
3738

3839
### Changed
3940

@@ -73,6 +74,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7374
- [#600](https://github.com/nf-core/sarek/pull/600) - Remove `TODO` in awsfulltest
7475
- [#606](https://github.com/nf-core/sarek/pull/606) - Updated `ASCAT` to version `3.0` as module
7576
- [#608](https://github.com/nf-core/sarek/pull/608) - Prevent candidate VCFs from getting published in manta
77+
- [#620](https://github.com/nf-core/sarek/pull/620) - `gender` is now `sex` in the samplesheet
7678

7779
### Fixed
7880

assets/schema_input.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
"pattern": "^\\S+$",
1818
"errorMessage": "Sample name must be provided and cannot contain spaces"
1919
},
20-
"gender": {
21-
"errorMessage": "Gender cannot contain spaces",
20+
"sex": {
21+
"errorMessage": "Sex cannot contain spaces",
2222
"anyOf": [
2323
{
2424
"type": "string",

conf/modules.config

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -743,7 +743,7 @@ process{
743743
ploidy: params.ploidy,
744744
printNA: params.wes ? "FALSE" : "TRUE",
745745
readcountthreshold: params.wes ? "50" : "10",
746-
sex: meta.gender,
746+
sex: meta.sex,
747747
//uniquematch: not set
748748
window: params.cf_window ?: ""
749749
],
@@ -926,12 +926,12 @@ process{
926926
withName: 'ASCAT' {
927927

928928
ext.args = {[
929-
"gender": meta.gender,
929+
"gender": meta.sex,
930930
"genomeVersion": params.ascat_genome,
931931
"purity": params.ascat_purity,
932932
"ploidy": params.ploidy,
933933
"minCounts": params.ascat_min_counts,
934-
"chrom_names": meta.gender == 'XX' ? params.ascat_chromosomes : "c(1:22, 'X', 'Y')",
934+
"chrom_names": meta.sex == 'XX' ? params.ascat_chromosomes : "c(1:22, 'X', 'Y')",
935935
"min_base_qual": params.ascat_min_base_qual,
936936
"min_map_qual": params.ascat_min_map_qual
937937
]}
@@ -1002,7 +1002,7 @@ process{
10021002
ploidy: params.ploidy,
10031003
printNA: params.wes ? "FALSE" : "TRUE",
10041004
readcountthreshold: params.wes ? "50" : "10",
1005-
sex: meta.gender,
1005+
sex: meta.sex,
10061006
//uniquematch: not set
10071007
window: params.cf_window ?: ""
10081008
],

docs/usage.md

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ Multiple `CSV` files can be specified if the path is enclosed in quotes.
5353
| Column | Description |
5454
| --------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
5555
| `patient` | **Custom patient ID**; designates the patient/subject; must be unique for each patient, but one patient can have multiple samples (e.g. normal and tumor). |
56-
| `gender` | **Sex chromosomes of the patient**; i.e. XX, XY..., only used for Copy-Number Variation analysis in a tumor/pair<br /> _Optional, Default: `NA`_ |
56+
| `sex` | **Sex chromosomes of the patient**; i.e. XX, XY..., only used for Copy-Number Variation analysis in a tumor/pair<br /> _Optional, Default: `NA`_ |
5757
| `status` | **Normal/tumor status of sample**; can be `0` (normal) or `1` (tumor).<br /> _Optional, Default: `0`_ |
5858
| `sample` | **Custom sample ID** for each tumor and normal sample; more than one tumor sample for each subject is possible, i.e. a tumor and a relapse; samples can have multiple lanes for which the _same_ ID must be used to merge them later (see also `lane`). Sample IDs must be unique for unique biological samples |
5959
| `lane` | Lane ID, used when the `sample` is multiplexed on several lanes. Must be unique for each lane in the same sample (but does not need to be the original lane name), and must contain at least one character <br /> _Required for `--step_mapping`_ |
@@ -104,10 +104,10 @@ patient1,test_sample,3,test_L003.bam
104104

105105
##### Full samplesheet
106106

107-
In this example, all possible columns are used. There are 3 read groups for the normal sample, 2 for the tumor sample, 1 for the relapse, including the `gender` and `status` information per patient:
107+
In this example, all possible columns are used. There are 3 read groups for the normal sample, 2 for the tumor sample, 1 for the relapse, including the `sex` and `status` information per patient:
108108

109109
```console
110-
patient,gender,status,sample,lane,fastq_1,fastq_2
110+
patient,sex,status,sample,lane,fastq_1,fastq_2
111111
patient1,XX,0,normal_sample,lane_1,test_L001_1.fastq.gz,test_L001_2.fastq.gz
112112
patient1,XX,0,normal_sample,lane_2,test_L002_1.fastq.gz,test_L002_2.fastq.gz
113113
patient1,XX,0,normal_sample,lane_3,test_L003_1.fastq.gz,test_L003_2.fastq.gz
@@ -117,7 +117,7 @@ patient1,XX,1,relapse_sample,lane_1,test3_L001_1.fastq.gz,test3_L001_2.fastq.gz
117117
```
118118

119119
```console
120-
patient,gender,status,sample,lane,bam
120+
patient,sex,status,sample,lane,bam
121121
patient1,XX,0,normal_sample,lane_1,test_L001.bam
122122
patient1,XX,0,normal_sample,lane_2,test_L002.bam
123123
patient1,XX,0,normal_sample,lane_3,test_L003.bam
@@ -148,17 +148,17 @@ The `Sarek`-generated `CSV` file is stored under `results/csv/mapped.csv` if in
148148

149149
##### Full samplesheet
150150

151-
In this example, all possible columns are used including the `gender` and `status` information per patient:
151+
In this example, all possible columns are used including the `sex` and `status` information per patient:
152152

153153
```console
154-
patient,gender,status,sample,bam,bai
154+
patient,sex,status,sample,bam,bai
155155
patient1,XX,0,test_sample,test_mapped.bam,test_mapped.bam.bai
156156
patient1,XX,1,tumor_sample,test2_mapped.bam,test2_mapped.bam.bai
157157
patient1,XX,1,relapse_sample,test3_mapped.bam,test3_mapped.bam.bai
158158
```
159159

160160
```console
161-
patient,gender,status,sample,cram,crai
161+
patient,sex,status,sample,cram,crai
162162
patient1,XX,0,normal_sample,test_mapped.cram,test_mapped.cram.crai
163163
patient1,XX,1,tumor_sample,test2_mapped.cram,test2_mapped.cram.crai
164164
patient1,XX,1,relapse_sample,test3_mapped.cram,test3_mapped.cram.crai
@@ -184,17 +184,17 @@ The `Sarek`-generated `CSV` file is stored under `results/csv/markduplicates_no_
184184

185185
##### Full samplesheet
186186

187-
In this example, all possible columns are used including the `gender` and `status` information per patient:
187+
In this example, all possible columns are used including the `sex` and `status` information per patient:
188188

189189
```console
190-
patient,gender,status,sample,bam,bai
190+
patient,sex,status,sample,bam,bai
191191
patient1,XX,0,test_sample,test_md.bam,test_md.bam.bai
192192
patient1,XX,1,tumor_sample,test2_md.bam,test2_md.bam.bai
193193
patient1,XX,1,relapse_sample,test3_md.bam,test3_md.bam.bai
194194
```
195195

196196
```console
197-
patient,gender,status,sample,cram,crai
197+
patient,sex,status,sample,cram,crai
198198
patient1,XX,0,normal_sample,test_md.cram,test_md.cram.crai
199199
patient1,XX,1,tumor_sample,test2_md.cram,test2_md.cram.crai
200200
patient1,XX,1,relapse_sample,test3_md.cram,test3_md.cram.crai
@@ -220,10 +220,10 @@ The `Sarek`-generated `CSV` file is stored under `results/csv/markduplicates.csv
220220

221221
##### Full samplesheet
222222

223-
In this example, all possible columns are used including the `gender` and `status` information per patient:
223+
In this example, all possible columns are used including the `sex` and `status` information per patient:
224224

225225
```console
226-
patient,gender,status,sample,cram,crai,table
226+
patient,sex,status,sample,cram,crai,table
227227
patient1,XX,0,test_sample,test_mapped.cram,test_mapped.cram.crai,test.table
228228
patient1,XX,1,tumor_sample,test2_mapped.cram,test2_mapped.cram.crai,test2.table
229229
patient1,XX,1,relapse_sample,test3_mapped.cram,test3_mapped.cram.crai,test3.table
@@ -249,10 +249,10 @@ The `Sarek`-generated `CSV` file is stored under `results/csv/recalibrated.csv`
249249

250250
##### Full samplesheet
251251

252-
In this example, all possible columns are used including the `gender` and `status` information per patient:
252+
In this example, all possible columns are used including the `sex` and `status` information per patient:
253253

254254
```console
255-
patient,gender,status,sample,cram,crai
255+
patient,sex,status,sample,cram,crai
256256
patient1,XX,0,normal_sample,test_mapped.cram,test_mapped.cram.crai
257257
patient1,XX,1,tumor_sample,test2_mapped.cram,test2_mapped.cram.crai
258258
patient1,XX,1,relapse_sample,test3_mapped.cram,test3_mapped.cram.crai

subworkflows/local/germline_variant_calling.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ workflow GERMLINE_VARIANT_CALLING {
4949
//If no interval file provided (0) then add empty list
5050
intervals_new = num_intervals == 0 ? [] : intervals
5151

52-
[[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:num_intervals],
52+
[[patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:num_intervals],
5353
cram, crai, intervals_new]
5454
}
5555

@@ -61,7 +61,7 @@ workflow GERMLINE_VARIANT_CALLING {
6161
bed_new = num_intervals == 0 ? [] : bed_tbi[0]
6262
tbi_new = num_intervals == 0 ? [] : bed_tbi[1]
6363

64-
[[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:num_intervals],
64+
[[patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:num_intervals],
6565
cram, crai, bed_new, tbi_new]
6666
}
6767

subworkflows/local/mapping_csv.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ workflow MAPPING_CSV {
1111
bam_indexed.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/csv") { meta, bam, bai ->
1212
patient = meta.patient
1313
sample = meta.sample
14-
gender = meta.gender
14+
sex = meta.sex
1515
status = meta.status
1616
bam = "${params.outdir}/preprocessing/${sample}/mapped/${bam.name}"
1717
bai = "${params.outdir}/preprocessing/${sample}/mapped/${bai.name}"
18-
["mapped.csv", "patient,gender,status,sample,bam,bai\n${patient},${gender},${status},${sample},${bam},${bai}\n"]
18+
["mapped.csv", "patient,sex,status,sample,bam,bai\n${patient},${sex},${status},${sample},${bam},${bai}\n"]
1919
}
2020
}

subworkflows/local/markduplicates_csv.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,12 @@ workflow MARKDUPLICATES_CSV {
1111
cram_markduplicates.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/csv") { meta, file, index ->
1212
patient = meta.patient
1313
sample = meta.sample
14-
gender = meta.gender
14+
sex = meta.sex
1515
status = meta.status
1616
suffix_aligned = params.save_output_as_bam ? "bam" : "cram"
1717
suffix_index = params.save_output_as_bam ? "bam.bai" : "cram.crai"
1818
file = "${params.outdir}/preprocessing/${sample}/markduplicates/${file.baseName}.${suffix_aligned}"
1919
index = "${params.outdir}/preprocessing/${sample}/markduplicates/${index.baseName.minus(".cram")}.${suffix_index}"
20-
["markduplicates_no_table.csv", "patient,gender,status,sample,cram,crai\n${patient},${gender},${status},${sample},${file},${index}\n"]
20+
["markduplicates_no_table.csv", "patient,sex,status,sample,cram,crai\n${patient},${sex},${status},${sample},${file},${index}\n"]
2121
}
2222
}

subworkflows/local/pair_variant_calling.nf

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ workflow PAIR_VARIANT_CALLING {
5858
//If no interval file provided (0) then add empty list
5959
intervals_new = num_intervals == 0 ? [] : intervals
6060

61-
[[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id: meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:num_intervals],
61+
[[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id: meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:num_intervals],
6262
normal_cram, normal_crai, tumor_cram, tumor_crai, intervals_new]
6363
}
6464

@@ -70,7 +70,7 @@ workflow PAIR_VARIANT_CALLING {
7070
bed_new = num_intervals == 0 ? [] : bed_tbi[0]
7171
tbi_new = num_intervals == 0 ? [] : bed_tbi[1]
7272

73-
[[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id: meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:num_intervals],
73+
[[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id: meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:num_intervals],
7474
normal_cram, normal_crai, tumor_cram, tumor_crai, bed_new, tbi_new]
7575

7676
}
@@ -168,7 +168,7 @@ workflow PAIR_VARIANT_CALLING {
168168
bed_new = num_intervals == 0 ? [] : bed_tbi[0]
169169
tbi_new = num_intervals == 0 ? [] : bed_tbi[1]
170170

171-
[[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:num_intervals],
171+
[[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:num_intervals],
172172
normal_cram, normal_crai, tumor_cram, tumor_crai, vcf, vcf_tbi, bed_new, tbi_new]
173173
}
174174

subworkflows/local/prepare_recalibration_csv.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@ workflow PREPARE_RECALIBRATION_CSV {
1111
cram_table_bqsr.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/csv") { meta, cram, crai, table ->
1212
patient = meta.patient
1313
sample = meta.sample
14-
gender = meta.gender
14+
sex = meta.sex
1515
status = meta.status
1616
suffix_aligned = params.save_output_as_bam ? "bam" : "cram"
1717
suffix_index = params.save_output_as_bam ? "bam.bai" : "cram.crai"
1818
cram = "${params.outdir}/preprocessing/${sample}/markduplicates/${cram.baseName}.${suffix_aligned}"
1919
crai = "${params.outdir}/preprocessing/${sample}/markduplicates/${crai.baseName.minus(".cram")}.${suffix_index}"
2020
table = "${params.outdir}/preprocessing/${sample}/recal_table/${sample}.recal.table"
21-
["markduplicates.csv", "patient,gender,status,sample,cram,crai,table\n${patient},${gender},${status},${sample},${cram},${crai},${table}\n"]
21+
["markduplicates.csv", "patient,sex,status,sample,cram,crai,table\n${patient},${sex},${status},${sample},${cram},${crai},${table}\n"]
2222
}
2323
}

subworkflows/local/recalibrate_csv.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ workflow RECALIBRATE_CSV {
1111
cram_recalibrated_index.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/csv") { meta, file, index ->
1212
patient = meta.patient
1313
sample = meta.sample
14-
gender = meta.gender
14+
sex = meta.sex
1515
status = meta.status
1616
file = "${params.outdir}/preprocessing/${sample}/recalibrated/${file.name}"
1717
index = "${params.outdir}/preprocessing/${sample}/recalibrated/${index.name}"
18-
["recalibrated.csv", "patient,gender,status,sample,cram,crai\n${patient},${gender},${status},${sample},${file},${index}\n"]
18+
["recalibrated.csv", "patient,sex,status,sample,cram,crai\n${patient},${sex},${status},${sample},${file},${index}\n"]
1919
}
2020
}

0 commit comments

Comments
 (0)