Skip to content

Commit 340f813

Browse files
Merge pull request #606 from SusiJo/ascat
Include new module ASCAT
2 parents 135ec96 + f09de9a commit 340f813

23 files changed

Lines changed: 732 additions & 299 deletions

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7070
- [#581](https://github.com/nf-core/sarek/pull/581) - `TIDDIT` is updated to `3.1.0`
7171
- [#593](https://github.com/nf-core/sarek/pull/593) - update `ensembl-vep` cache version and module
7272
- [#600](https://github.com/nf-core/sarek/pull/600) - Remove `TODO` in awsfulltest
73+
- [#606](https://github.com/nf-core/sarek/pull/606) - Updated `ASCAT` to version `3.0` as module
7374
- [#608](https://github.com/nf-core/sarek/pull/608) - Prevent candidate VCFs from getting published in manta
7475

7576
### Fixed

bin/convertAlleleCounts.r

Lines changed: 0 additions & 97 deletions
This file was deleted.

bin/run_ascat.r

Lines changed: 0 additions & 94 deletions
This file was deleted.

conf/igenomes.config

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,11 @@ params {
1212
// illumina iGenomes reference file paths
1313
genomes {
1414
'GATK.GRCh37' {
15-
ac_loci = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/1000G_phase3_20130502_SNP_maf0.3.loci"
16-
ac_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/1000G_phase3_20130502_SNP_maf0.3.loci.gc"
15+
ascat_alleles = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/G1000_alleles_hg19.zip"
16+
ascat_genome = 'hg19'
17+
ascat_loci = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/G1000_loci_hg19.zip"
18+
ascat_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/GC_G1000_hg19.zip"
19+
ascat_loci_rt = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/RT_G1000_hg19.zip"
1720
bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/BWAIndex/"
1821
chr_dir = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/Chromosomes"
1922
dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf"
@@ -34,8 +37,11 @@ params {
3437
vep_species = 'homo_sapiens'
3538
}
3639
'GATK.GRCh38' {
37-
ac_loci = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/1000G_phase3_GRCh38_maf0.3.loci"
38-
ac_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/1000G_phase3_GRCh38_maf0.3.loci.gc"
40+
ascat_alleles = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/G1000_alleles_hg38.zip"
41+
ascat_genome = 'hg38'
42+
ascat_loci = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/G1000_loci_hg38.zip"
43+
ascat_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/GC_G1000_hg38.zip"
44+
ascat_loci_rt = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/RT_G1000_hg38.zip"
3945
bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAIndex/"
4046
bwamem2 = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAmem2Index/"
4147
dragmap = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/dragmap/"

conf/modules.config

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -923,25 +923,25 @@ process{
923923
// PAIR_VARIANT_CALLING
924924

925925
//ASCAT
926-
if (params.tools && params.tools.contains('ascat')) {
927-
withName: 'ASCAT' {
928-
ext.args = {
929-
[
930-
"gender": meta.sex,
931-
//"genomeVersion": "hg19"
932-
//"purity": (optional),
933-
//"ploidy": params.ploidy,
934-
//"gc_files": (optional),
935-
//"minCounts": (optional),
936-
//"chrom_names": (optional),
937-
//"min_base_qual": (optional),
938-
//"min_map_qual": (optional),
939-
//"ref_fasta": (optional),
940-
//"skip_allele_counting_tumour": (optional),
941-
//"skip_allele_counting_normal": (optional)
942-
]
943-
}
944-
}
926+
withName: 'ASCAT' {
927+
928+
ext.args = {[
929+
"gender": meta.gender,
930+
"genomeVersion": params.ascat_genome,
931+
"purity": params.ascat_purity,
932+
"ploidy": params.ploidy,
933+
"minCounts": params.ascat_min_counts,
934+
"chrom_names": meta.gender == 'XX' ? params.ascat_chromosomes : "c(1:22, 'X', 'Y')",
935+
"min_base_qual": params.ascat_min_base_qual,
936+
"min_map_qual": params.ascat_min_map_qual
937+
]}
938+
ext.when = { params.tools && params.tools.contains('ascat') }
939+
publishDir = [
940+
mode: params.publish_dir_mode,
941+
path: { "${params.outdir}/variant_calling/${meta.id}/ascat" },
942+
pattern: "*{png,cnvs.txt,metrics.txt,purityploidy.txt,segments.txt,LogR.txt,BAF.txt}"
943+
]
944+
945945
}
946946

947947
//CONTROLFREEC

conf/test.config

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,25 @@ profiles {
147147

148148
params.nucleotides_per_second = 20
149149
}
150+
// can only be tested locally due to too large cram files for GHA
151+
// download corresponding input files (ascat_somatic.csv) from ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/
152+
// test works only without chromosome annotated loci files available at https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS
153+
tools_somatic_ascat{
154+
params.input = "${baseDir}/tests/csv/3.0/ascat_somatic.csv"
155+
params.chr_dir = params.test_data['homo_sapiens']['genome']['genome_21_chromosomes_dir']
156+
params.ascat_loci = "/mnt/volume/repos/modules/test_ascat2/G1000_loci_hg19.zip"
157+
params.ascat_chromosomes = 'c("21", "22")'
158+
params.ascat_min_base_qual = 30
159+
params.germline_resource = params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz']
160+
params.intervals = params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed']
161+
params.step = 'variant_calling'
162+
params.joint_germline = true
163+
params.wes = false
164+
params.tools = 'ascat'
165+
params.igenomes_ignore = false
166+
params.genome = 'GATK.GRCh37'
167+
168+
}
150169
trimming {
151170
params.clip_r1 = 1
152171
params.clip_r2 = 1

docs/output.md

Lines changed: 28 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
4242
- [TIDDIT](#tiddit)
4343
- [Sentieon DNAscope SV](#sentieon-dnascope-sv)
4444
- [Sample heterogeneity, ploidy and CNVs](#sample-heterogeneity-ploidy-and-cnvs)
45-
- [ConvertAlleleCounts](#convertallelecounts)
4645
- [ASCAT](#ascat)
4746
- [Control-FREEC](#control-freec)
4847
- [MSI status](#msi-status)
@@ -432,51 +431,45 @@ For further reading and documentation see the [Sentieon DNAscope user guide](htt
432431

433432
### Sample heterogeneity, ploidy and CNVs
434433

435-
#### ConvertAlleleCounts
436-
437-
Running ASCAT on NGS data requires that the `BAM` files are converted into BAF and LogR values.
438-
This can be done using the software [AlleleCount](https://github.com/cancerit/alleleCount) followed by the provided [ConvertAlleleCounts](https://github.com/nf-core/sarek/blob/master/bin/convertAlleleCounts.r) R-script.
439-
440-
For a Tumor/Normal pair:
441-
442-
**Output directory: `results/VariantCalling/[TUMOR_vs_NORMAL]/ASCAT`**
443-
444-
- `[TUMORSAMPLE].BAF` and `[NORMALSAMPLE].BAF`
445-
- file with beta allele frequencies
446-
- `[TUMORSAMPLE].LogR` and `[NORMALSAMPLE].LogR`
447-
- file with total copy number on a logarithmic scale
448-
449434
#### ASCAT
450435

451436
[ASCAT](https://github.com/Crick-CancerGenomics/ascat) is a software for performing allele-specific copy number analysis of tumor samples and for estimating tumor ploidy and purity (normal contamination).
452437
It infers tumor purity and ploidy and calculates whole-genome allele-specific copy number profiles.
453438
`ASCAT` is written in `R` and available here: [github.com/Crick-CancerGenomics/ascat](https://github.com/Crick-CancerGenomics/ascat).
454439
The `ASCAT` process gives several images as output, described in detail in this [book chapter](http://www.ncbi.nlm.nih.gov/pubmed/22130873).
440+
Running ASCAT on NGS data requires that the `BAM` files are converted into BAF and LogR values.
441+
This is done internally using the software [AlleleCount](https://github.com/cancerit/alleleCount).
455442

456443
For a Tumor/Normal pair:
457444

458-
**Output directory: `results/VariantCalling/[TUMOR_vs_NORMAL]/ASCAT`**
445+
**Output directory: `results/variant_calling/[TUMOR_vs_NORMAL]/ascat`**
459446

460-
- `[TUMORSAMPLE].aberrationreliability.png`
461-
- Image with information about aberration reliability
462-
- `[TUMORSAMPLE].ASCATprofile.png`
463-
- Image with information about ASCAT profile
464-
- `[TUMORSAMPLE].ASPCF.png`
447+
- `[TUMORSAMPLE_VS_NORMALSAMPLE].tumour.ASPCF.png`
465448
- Image with information about ASPCF
466-
- `[TUMORSAMPLE].rawprofile.png`
467-
- Image with information about raw profile
468-
- `[TUMORSAMPLE].sunrise.png`
469-
- Image with information about sunrise
470-
- `[TUMORSAMPLE].tumour.png`
471-
- Image with information about tumor
472-
- `[TUMORSAMPLE].cnvs.txt`
473-
- file with information about CNVS
474-
- `[TUMORSAMPLE].LogR.PCFed.txt`
475-
- file with information about LogR
476-
- `[TUMORSAMPLE].purityploidy.txt`
477-
- file with information about purity ploidy
478-
479-
The text file `[TUMORSAMPLE].cnvs.txt` countains predictions about copy number state for all the segments.
449+
- `[TUMORSAMPLE_VS_NORMALSAMPLE].before_correction.[TUMORSAMPLE_VS_NORMALSAMPLE].tumour.png`
450+
- Image with information about raw profile of tumor sample of logR and BAF values
451+
- `[TUMORSAMPLE_VS_NORMALSAMPLE].before_correction.[TUMORSAMPLE_VS_NORMALSAMPLE].germline.png`
452+
- Image with information about raw profile of normal sample of logR and BAF values
453+
- `[TUMORSAMPLE_VS_NORMALSAMPLE].after_correction_gc_rt.[TUMORSAMPLE_VS_NORMALSAMPLE].tumour.png`
454+
- Image with information about GC and RT corrected logR and BAF values of tumor sample
455+
- `[TUMORSAMPLE_VS_NORMALSAMPLE].after_correction_gc_rt.[TUMORSAMPLE_VS_NORMALSAMPLE].germline.png`
456+
- Image with information about GC and RT corrected logR and BAF values of normal sample
457+
- `[TUMORSAMPLE_VS_NORMALSAMPLE].sunrise.png`
458+
- Image visualising the range of ploidy and tumor percentage values
459+
- `[TUMORSAMPLE_VS_NORMALSAMPLE].metrics.txt`
460+
- File with information about different metrics from ASCAT profiles
461+
- `[TUMORSAMPLE_VS_NORMALSAMPLE].cnvs.txt`
462+
- File with information about CNVS
463+
- `[TUMORSAMPLE_VS_NORMALSAMPLE].purityploidy.txt`
464+
- File with information about purity and ploidy
465+
- `[TUMORSAMPLE_VS_NORMALSAMPLE].segments.txt`
466+
- File with information about copy number segments
467+
- `[TUMORSAMPLE_VS_NORMALSAMPLE].tumour_tumourBAF.txt` and `[TUMORSAMPLE_VS_NORMALSAMPLE].tumour_normalBAF.txt`
468+
- file with beta allele frequencies
469+
- `[TUMORSAMPLE_VS_NORMALSAMPLE].tumour_tumourLogR.txt` and `[TUMORSAMPLE_VS_NORMALSAMPLE].tumour_normalLogR.txt`
470+
- File with total copy number on a logarithmic scale
471+
472+
The text file `[TUMORSAMPLE_VS_NORMALSAMPLE].cnvs.txt` contains predictions about copy number state for all the segments.
480473
The output is a tab delimited text file with the following columns:
481474

482475
- _chr_: chromosome number

0 commit comments

Comments
 (0)