nf-core · maxulysse · Jul 6, 2022 · Jul 5, 2022 · Jul 5, 2022 · Jul 5, 2022
@@ -34,6 +34,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#597](https://github.com/nf-core/sarek/pull/597) - Added tiddit for tumor variant calling
 - [#600](https://github.com/nf-core/sarek/pull/600) - Added description for UMI related params in schema
 - [#604](https://github.com/nf-core/sarek/pull/604), [#617](https://github.com/nf-core/sarek/pull/617) - Added full size tests WGS 30x NA12878
+- [#620](https://github.com/nf-core/sarek/pull/620) - Added checks for sex information when running a CNV tools
 
 ### Changed
 
@@ -73,6 +74,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#600](https://github.com/nf-core/sarek/pull/600) - Remove `TODO` in awsfulltest
 - [#606](https://github.com/nf-core/sarek/pull/606) - Updated `ASCAT` to version `3.0` as module
 - [#608](https://github.com/nf-core/sarek/pull/608) - Prevent candidate VCFs from getting published in manta
+- [#620](https://github.com/nf-core/sarek/pull/620) - `gender` is now `sex` in the samplesheet
 
 ### Fixed
 

@@ -17,8 +17,8 @@
                 "pattern": "^\\S+$",
                 "errorMessage": "Sample name must be provided and cannot contain spaces"
             },
-            "gender": {
-                "errorMessage": "Gender cannot contain spaces",
+            "sex": {
+                "errorMessage": "Sex cannot contain spaces",
                 "anyOf": [
                     {
                         "type": "string",

@@ -743,7 +743,7 @@ process{
                         ploidy: params.ploidy,
                         printNA:  params.wes ? "FALSE" : "TRUE",
                         readcountthreshold:  params.wes ? "50" : "10",
-                        sex: meta.gender,
+                        sex: meta.sex,
                         //uniquematch: not set
                         window: params.cf_window ?: ""
                         ],
@@ -926,12 +926,12 @@ process{
     withName: 'ASCAT' {
 
         ext.args = {[
-            "gender": meta.gender,
+            "gender": meta.sex,
             "genomeVersion": params.ascat_genome,
             "purity": params.ascat_purity,
             "ploidy": params.ploidy,
             "minCounts": params.ascat_min_counts,
-            "chrom_names": meta.gender == 'XX' ? params.ascat_chromosomes : "c(1:22, 'X', 'Y')",
+            "chrom_names": meta.sex == 'XX' ? params.ascat_chromosomes : "c(1:22, 'X', 'Y')",
             "min_base_qual": params.ascat_min_base_qual,
             "min_map_qual": params.ascat_min_map_qual
             ]}
@@ -1002,7 +1002,7 @@ process{
                         ploidy: params.ploidy,
                         printNA:  params.wes ? "FALSE" : "TRUE",
                         readcountthreshold:  params.wes ? "50" : "10",
-                        sex: meta.gender,
+                        sex: meta.sex,
                         //uniquematch: not set
                         window: params.cf_window ?: ""
                         ],

@@ -53,7 +53,7 @@ Multiple `CSV` files can be specified if the path is enclosed in quotes.
 | Column    | Description                                                                                                                                                                                                                                                                                                     |
 | --------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `patient` | **Custom patient ID**; designates the patient/subject; must be unique for each patient, but one patient can have multiple samples (e.g. normal and tumor).                                                                                                                                                      |
-| `gender`  | **Sex chromosomes of the patient**; i.e. XX, XY..., only used for Copy-Number Variation analysis in a tumor/pair<br /> _Optional, Default: `NA`_                                                                                                                                                                |
+| `sex`     | **Sex chromosomes of the patient**; i.e. XX, XY..., only used for Copy-Number Variation analysis in a tumor/pair<br /> _Optional, Default: `NA`_                                                                                                                                                                |
 | `status`  | **Normal/tumor status of sample**; can be `0` (normal) or `1` (tumor).<br /> _Optional, Default: `0`_                                                                                                                                                                                                           |
 | `sample`  | **Custom sample ID** for each tumor and normal sample; more than one tumor sample for each subject is possible, i.e. a tumor and a relapse; samples can have multiple lanes for which the _same_ ID must be used to merge them later (see also `lane`). Sample IDs must be unique for unique biological samples |
 | `lane`    | Lane ID, used when the `sample` is multiplexed on several lanes. Must be unique for each lane in the same sample (but does not need to be the original lane name), and must contain at least one character <br /> _Required for `--step_mapping`_                                                               |
@@ -104,10 +104,10 @@ patient1,test_sample,3,test_L003.bam
 
 ##### Full samplesheet
 
-In this example, all possible columns are used. There are 3 read groups for the normal sample, 2 for the tumor sample, 1 for the relapse, including the `gender` and `status` information per patient:
+In this example, all possible columns are used. There are 3 read groups for the normal sample, 2 for the tumor sample, 1 for the relapse, including the `sex` and `status` information per patient:
 
 ```console
-patient,gender,status,sample,lane,fastq_1,fastq_2
+patient,sex,status,sample,lane,fastq_1,fastq_2
 patient1,XX,0,normal_sample,lane_1,test_L001_1.fastq.gz,test_L001_2.fastq.gz
 patient1,XX,0,normal_sample,lane_2,test_L002_1.fastq.gz,test_L002_2.fastq.gz
 patient1,XX,0,normal_sample,lane_3,test_L003_1.fastq.gz,test_L003_2.fastq.gz
@@ -117,7 +117,7 @@ patient1,XX,1,relapse_sample,lane_1,test3_L001_1.fastq.gz,test3_L001_2.fastq.gz
 ```
 
 ```console
-patient,gender,status,sample,lane,bam
+patient,sex,status,sample,lane,bam
 patient1,XX,0,normal_sample,lane_1,test_L001.bam
 patient1,XX,0,normal_sample,lane_2,test_L002.bam
 patient1,XX,0,normal_sample,lane_3,test_L003.bam
@@ -148,17 +148,17 @@ The `Sarek`-generated `CSV` file is stored under `results/csv/mapped.csv` if in
 
 ##### Full samplesheet
 
-In this example, all possible columns are used including the `gender` and `status` information per patient:
+In this example, all possible columns are used including the `sex` and `status` information per patient:
 
 ```console
-patient,gender,status,sample,bam,bai
+patient,sex,status,sample,bam,bai
 patient1,XX,0,test_sample,test_mapped.bam,test_mapped.bam.bai
 patient1,XX,1,tumor_sample,test2_mapped.bam,test2_mapped.bam.bai
 patient1,XX,1,relapse_sample,test3_mapped.bam,test3_mapped.bam.bai
 ```
 
 ```console
-patient,gender,status,sample,cram,crai
+patient,sex,status,sample,cram,crai
 patient1,XX,0,normal_sample,test_mapped.cram,test_mapped.cram.crai
 patient1,XX,1,tumor_sample,test2_mapped.cram,test2_mapped.cram.crai
 patient1,XX,1,relapse_sample,test3_mapped.cram,test3_mapped.cram.crai
@@ -184,17 +184,17 @@ The `Sarek`-generated `CSV` file is stored under `results/csv/markduplicates_no_
 
 ##### Full samplesheet
 
-In this example, all possible columns are used including the `gender` and `status` information per patient:
+In this example, all possible columns are used including the `sex` and `status` information per patient:
 
 ```console
-patient,gender,status,sample,bam,bai
+patient,sex,status,sample,bam,bai
 patient1,XX,0,test_sample,test_md.bam,test_md.bam.bai
 patient1,XX,1,tumor_sample,test2_md.bam,test2_md.bam.bai
 patient1,XX,1,relapse_sample,test3_md.bam,test3_md.bam.bai
 ```
 
 ```console
-patient,gender,status,sample,cram,crai
+patient,sex,status,sample,cram,crai
 patient1,XX,0,normal_sample,test_md.cram,test_md.cram.crai
 patient1,XX,1,tumor_sample,test2_md.cram,test2_md.cram.crai
 patient1,XX,1,relapse_sample,test3_md.cram,test3_md.cram.crai
@@ -220,10 +220,10 @@ The `Sarek`-generated `CSV` file is stored under `results/csv/markduplicates.csv
 
 ##### Full samplesheet
 
-In this example, all possible columns are used including the `gender` and `status` information per patient:
+In this example, all possible columns are used including the `sex` and `status` information per patient:
 
 ```console
-patient,gender,status,sample,cram,crai,table
+patient,sex,status,sample,cram,crai,table
 patient1,XX,0,test_sample,test_mapped.cram,test_mapped.cram.crai,test.table
 patient1,XX,1,tumor_sample,test2_mapped.cram,test2_mapped.cram.crai,test2.table
 patient1,XX,1,relapse_sample,test3_mapped.cram,test3_mapped.cram.crai,test3.table
@@ -249,10 +249,10 @@ The `Sarek`-generated `CSV` file is stored under `results/csv/recalibrated.csv`
 
 ##### Full samplesheet
 
-In this example, all possible columns are used including the `gender` and `status` information per patient:
+In this example, all possible columns are used including the `sex` and `status` information per patient:
 
 ```console
-patient,gender,status,sample,cram,crai
+patient,sex,status,sample,cram,crai
 patient1,XX,0,normal_sample,test_mapped.cram,test_mapped.cram.crai
 patient1,XX,1,tumor_sample,test2_mapped.cram,test2_mapped.cram.crai
 patient1,XX,1,relapse_sample,test3_mapped.cram,test3_mapped.cram.crai

@@ -49,7 +49,7 @@ workflow GERMLINE_VARIANT_CALLING {
             //If no interval file provided (0) then add empty list
             intervals_new = num_intervals == 0 ? [] : intervals
 
-            [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:num_intervals],
+            [[patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:num_intervals],
             cram, crai, intervals_new]
         }
 
@@ -61,7 +61,7 @@ workflow GERMLINE_VARIANT_CALLING {
             bed_new = num_intervals == 0 ? [] : bed_tbi[0]
             tbi_new = num_intervals == 0 ? [] : bed_tbi[1]
 
-            [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:num_intervals],
+            [[patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:num_intervals],
             cram, crai, bed_new, tbi_new]
         }
 

@@ -11,10 +11,10 @@ workflow MAPPING_CSV {
         bam_indexed.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/csv") { meta, bam, bai ->
             patient = meta.patient
             sample  = meta.sample
-            gender  = meta.gender
+            sex     = meta.sex
             status  = meta.status
             bam   = "${params.outdir}/preprocessing/${sample}/mapped/${bam.name}"
             bai   = "${params.outdir}/preprocessing/${sample}/mapped/${bai.name}"
-            ["mapped.csv", "patient,gender,status,sample,bam,bai\n${patient},${gender},${status},${sample},${bam},${bai}\n"]
+            ["mapped.csv", "patient,sex,status,sample,bam,bai\n${patient},${sex},${status},${sample},${bam},${bai}\n"]
         }
 }
@@ -11,12 +11,12 @@ workflow MARKDUPLICATES_CSV {
         cram_markduplicates.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/csv") { meta, file, index ->
             patient        = meta.patient
             sample         = meta.sample
-            gender         = meta.gender
+            sex            = meta.sex
             status         = meta.status
             suffix_aligned = params.save_output_as_bam ? "bam" : "cram"
             suffix_index   = params.save_output_as_bam ? "bam.bai" : "cram.crai"
             file   = "${params.outdir}/preprocessing/${sample}/markduplicates/${file.baseName}.${suffix_aligned}"
             index   = "${params.outdir}/preprocessing/${sample}/markduplicates/${index.baseName.minus(".cram")}.${suffix_index}"
-            ["markduplicates_no_table.csv", "patient,gender,status,sample,cram,crai\n${patient},${gender},${status},${sample},${file},${index}\n"]
+            ["markduplicates_no_table.csv", "patient,sex,status,sample,cram,crai\n${patient},${sex},${status},${sample},${file},${index}\n"]
         }
 }
@@ -58,7 +58,7 @@ workflow PAIR_VARIANT_CALLING {
             //If no interval file provided (0) then add empty list
             intervals_new = num_intervals == 0 ? [] : intervals
 
-            [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id: meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:num_intervals],
+            [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id: meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:num_intervals],
             normal_cram, normal_crai, tumor_cram, tumor_crai, intervals_new]
         }
 
@@ -70,7 +70,7 @@ workflow PAIR_VARIANT_CALLING {
             bed_new = num_intervals == 0 ? [] : bed_tbi[0]
             tbi_new = num_intervals == 0 ? [] : bed_tbi[1]
 
-            [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id: meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:num_intervals],
+            [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id: meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:num_intervals],
             normal_cram, normal_crai, tumor_cram, tumor_crai, bed_new, tbi_new]
 
         }
@@ -168,7 +168,7 @@ workflow PAIR_VARIANT_CALLING {
                                             bed_new = num_intervals == 0 ? [] : bed_tbi[0]
                                             tbi_new = num_intervals == 0 ? [] : bed_tbi[1]
 
-                                            [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:num_intervals],
+                                            [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, sex:meta.sex, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:num_intervals],
                                             normal_cram, normal_crai, tumor_cram, tumor_crai, vcf, vcf_tbi, bed_new, tbi_new]
                                         }
 

@@ -11,13 +11,13 @@ workflow PREPARE_RECALIBRATION_CSV {
         cram_table_bqsr.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/csv") { meta, cram, crai, table ->
             patient = meta.patient
             sample  = meta.sample
-            gender  = meta.gender
+            sex     = meta.sex
             status  = meta.status
             suffix_aligned = params.save_output_as_bam ? "bam" : "cram"
             suffix_index   = params.save_output_as_bam ? "bam.bai" : "cram.crai"
             cram = "${params.outdir}/preprocessing/${sample}/markduplicates/${cram.baseName}.${suffix_aligned}"
             crai = "${params.outdir}/preprocessing/${sample}/markduplicates/${crai.baseName.minus(".cram")}.${suffix_index}"
             table = "${params.outdir}/preprocessing/${sample}/recal_table/${sample}.recal.table"
-            ["markduplicates.csv", "patient,gender,status,sample,cram,crai,table\n${patient},${gender},${status},${sample},${cram},${crai},${table}\n"]
+            ["markduplicates.csv", "patient,sex,status,sample,cram,crai,table\n${patient},${sex},${status},${sample},${cram},${crai},${table}\n"]
         }
 }
@@ -11,10 +11,10 @@ workflow RECALIBRATE_CSV {
         cram_recalibrated_index.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/csv") { meta, file, index ->
             patient = meta.patient
             sample  = meta.sample
-            gender  = meta.gender
+            sex     = meta.sex
             status  = meta.status
             file = "${params.outdir}/preprocessing/${sample}/recalibrated/${file.name}"
             index = "${params.outdir}/preprocessing/${sample}/recalibrated/${index.name}"
-            ["recalibrated.csv", "patient,gender,status,sample,cram,crai\n${patient},${gender},${status},${sample},${file},${index}\n"]
+            ["recalibrated.csv", "patient,sex,status,sample,cram,crai\n${patient},${sex},${status},${sample},${file},${index}\n"]
         }
 }
@@ -50,7 +50,7 @@ workflow TUMOR_ONLY_VARIANT_CALLING {
             //If no interval file provided (0) then add empty list
             intervals_new = num_intervals == 0 ? [] : intervals
 
-            [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:num_intervals],
+            [[patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:num_intervals],
             cram, crai, intervals_new]
         }
 
@@ -62,7 +62,7 @@ workflow TUMOR_ONLY_VARIANT_CALLING {
             bed_new = num_intervals == 0 ? [] : bed_tbi[0]
             tbi_new = num_intervals == 0 ? [] : bed_tbi[1]
 
-            [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:num_intervals],
+            [[patient:meta.patient, sample:meta.sample, sex:meta.sex, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:num_intervals],
             cram, crai, bed_new, tbi_new]
         }
 

@@ -13,6 +13,6 @@ workflow VARIANTCALLING_CSV {
             sample        = meta.id
             variantcaller = meta.variantcaller
             vcf = "${params.outdir}/variant_calling/${meta.id}/${variantcaller}/${vcf.getName()}"
-            ["variantcalled.csv", "patient,gender,sample,variantcaller,vcf\n${patient},${sample},${variantcaller},${vcf}\n"]
+            ["variantcalled.csv", "patient,sample,variantcaller,vcf\n${patient},${sample},${variantcaller},${vcf}\n"]
         }
 }