Skip to content

Commit 2102bb0

Browse files
committed
WIP
1 parent 248545e commit 2102bb0

10 files changed

Lines changed: 255 additions & 1 deletion

File tree

.vscode/settings.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
{
2-
"markdown.styles": ["public/vscode_markdown.css"]
2+
"markdown.styles": [
3+
"public/vscode_markdown.css"
4+
],
5+
"nextflow.telemetry.enabled": true
36
}

docs/development/manual_tests.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1133,3 +1133,23 @@ nextflow run main.nf -profile test,docker --outdir ./results -w work/ -resume --
11331133
## Expect: BAM input shows up in FastQC -> mapping results.
11341134
nextflow run main.nf -profile test,docker --outdir ./results -w work/ --convert_inputbam --skip_deduplication -resume -ansi-log false -dump-channels
11351135
```
1136+
1137+
### MTDNA HAPLOGROUP CLASSIFICATION
1138+
1139+
```bash
1140+
#### MTDNA HAPLOGROUP CLASSIFICATION with default settings
1141+
## Expect: Directory created 'mtdna_haplogroup/<reference>/<sample_id>' containing a .txt file for each sample with haplogroup assignments
1142+
## Expect: The haplogroup .txt file contains at minimum columns for rank, name, quality, range, and details of the haplogroup assignment
1143+
nextflow run main.nf -profile docker,test --outdir ./results/mtdna_haplogroup_test --run_mtdna_haplogroup -resume
1144+
1145+
#### MTDNA HAPLOGROUP CLASSIFICATION with specific arguments
1146+
## Expect: Directory created 'mtdna_haplogroup/<reference>/<sample_id>' containing a .txt file for each sample with haplogroup assignments
1147+
## Expect: The haplogroup assignment may differ based on the classification settings
1148+
nextflow run main.nf -profile docker,test --outdir ./results/mtdna_haplogroup_test_args --run_mtdna_haplogroup --haplogrep_args '--extend-report' -resume
1149+
1150+
#### MTDNA HAPLOGROUP CLASSIFICATION with custom VCF input
1151+
## Use as input a version of the TSV that contains mitochondrial VCF files
1152+
## Expect: Directory created 'mtdna_haplogroup/<reference>/<sample_id>' containing a .txt file for each sample with haplogroup assignments
1153+
## Expect: The haplogroup assignment should reflect the variants in the input VCF files
1154+
nextflow run main.nf -profile docker,test --input ~/eager_dsl2_testing/input/mtdna/mtdna_vcf_samples.tsv --outdir ./results/mtdna_haplogroup_vcf_test --run_mtdna_haplogroup -resume
1155+
```

modules.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,11 @@
180180
"git_sha": "3a5fef109d113b4997c9822198664ca5f2716208",
181181
"installed_by": ["modules"]
182182
},
183+
"haplogrep3/classify": {
184+
"branch": "master",
185+
"git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
186+
"installed_by": ["modules"]
187+
},
183188
"kraken2/kraken2": {
184189
"branch": "master",
185190
"git_sha": "653218e79ffa76fde20319e9062f8b8da5cf7555",

nextflow.config

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,9 @@ params {
249249
run_sexdeterrmine = false
250250
sexdeterrmine_bedfile = null
251251

252+
// mtDNA haplogroup classification
253+
run_mtdna_haplogroup = false
254+
252255
// Genotyping
253256
run_genotyping = false
254257
genotyping_tool = null

nextflow_schema.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1590,6 +1590,21 @@
15901590
},
15911591
"fa_icon": "fas fa-transgender-alt",
15921592
"help_text": ""
1593+
},
1594+
"mtdna_haplogroup_options": {
1595+
"title": "mtDNA Haplogroup Classification",
1596+
"type": "object",
1597+
"description": "Options for classifying mitochondrial haplogroups using Haplogrep3.",
1598+
"default": "",
1599+
"fa_icon": "fas fa-dna",
1600+
"properties": {
1601+
"run_mtdna_haplogroup": {
1602+
"type": "boolean",
1603+
"description": "Run Haplogrep3 to determine mitochondrial haplogroups from VCF files.",
1604+
"fa_icon": "fas fa-dna",
1605+
"default": false
1606+
}
1607+
}
15931608
}
15941609
},
15951610
"allOf": [
@@ -1646,6 +1661,9 @@
16461661
},
16471662
{
16481663
"$ref": "#/$defs/human_sex_determination"
1664+
},
1665+
{
1666+
"$ref": "#/$defs/mtdna_haplogroup_options"
16491667
}
16501668
]
16511669
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
//
2+
// Run classify mtdna haplogroup
3+
//
4+
5+
include { addNewMetaFromAttributes } from '../../subworkflows/local/utils_nfcore_eager_pipeline/main'
6+
7+
include { HAPLOGREP3_CLASSIFY as HAPLOGREP3_CLASSIFY_MTDNA } from '../../modules/nf-core/haplogrep3/classify/main'
8+
9+
workflow CLASSIFY_MTDNA_HAPLOGROUP {
10+
11+
take:
12+
mtdna_haplogroup_bam // channel: [ val(meta1), [ bam ], [ bai ] ]
13+
14+
main:
15+
ch_versions = Channel.empty()
16+
ch_multiqc_files = Channel.empty()
17+
ch_haplogroups = Channel.empty()
18+
19+
if ( params.run_mtdna_haplogroup ) {
20+
// Prepare input for haplogrep3
21+
// The module requires a tuple with [meta, inputfile]
22+
// We already have [meta, bam, bai] from the input channel
23+
24+
ch_input_haplogrep3 = mtdna_haplogroup_bam
25+
.map {
26+
// Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute
27+
addNewMetaFromAttributes( it, "reference" , "reference" , false )
28+
}
29+
.map { meta, bam, bai ->
30+
[meta, bam]
31+
}
32+
33+
// Run mtDNA haplogroup classification with haplogrep3
34+
HAPLOGREP3_CLASSIFY_MTDNA(ch_input_haplogrep3)
35+
ch_haplogroups = HAPLOGREP3_CLASSIFY_MTDNA.out.txt
36+
ch_versions = ch_versions.mix(HAPLOGREP3_CLASSIFY_MTDNA.out.versions)
37+
}
38+
39+
emit:
40+
haplogroups = ch_haplogroups // channel: [ val(meta), path("*.txt") ]
41+
versions = ch_versions // channel: path(versions.yml)
42+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :)
2+
// https://github.com/nf-core/modules/tree/master/subworkflows
3+
// You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace:
4+
// https://nf-co.re/join
5+
// TODO nf-core: A subworkflow SHOULD import at least two modules
6+
7+
include { addNewMetaFromAttributes } from '../../subworkflows/local/utils_nfcore_eager_pipeline/main'
8+
include { HAPLOGREP3_CLASSIFY } from '../../modules/nf-core/haplogrep3/classify/main'
9+
10+
workflow HAPLOTYPE_HUMAN_MTDNA {
11+
12+
take:
13+
ch_mtdna_vcf // channel: [ val(meta), [ vcf ] ]
14+
15+
main:
16+
ch_versions = Channel.empty()
17+
ch_haplogroups = Channel.empty()
18+
19+
// Prepare input for haplogrep3
20+
// The module requires a tuple with [meta, inputfile]
21+
ch_input_haplogrep3 = ch_mtdna_vcf
22+
.map {
23+
// Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute
24+
addNewMetaFromAttributes( it, "reference", "reference", false )
25+
}
26+
27+
// Run mtDNA haplogroup classification with haplogrep3
28+
HAPLOGREP3_CLASSIFY(ch_input_haplogrep3)
29+
ch_haplogroups = HAPLOGREP3_CLASSIFY.out.txt
30+
ch_versions = ch_versions.mix(HAPLOGREP3_CLASSIFY.out.versions)
31+
32+
emit:
33+
haplogroups = ch_haplogroups // channel: [ val(meta), path("*.txt") ]
34+
versions = ch_versions // channel: path(versions.yml)
35+
}
36+
37+
38+
workflow HAPLOGREP3_CLASSIFY {
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
2+
name: "haplotype_human_mtdna"
3+
## TODO nf-core: Add a description of the subworkflow and list keywords
4+
description: Sort SAM/BAM/CRAM file
5+
keywords:
6+
- sort
7+
- bam
8+
- sam
9+
- cram
10+
## TODO nf-core: Add a list of the modules and/or subworkflows used in the subworkflow
11+
components:
12+
- samtools/sort
13+
- samtools/index
14+
## TODO nf-core: List all of the channels used as input with a description and their structure
15+
input:
16+
- ch_bam:
17+
type: file
18+
description: |
19+
The input channel containing the BAM/CRAM/SAM files
20+
Structure: [ val(meta), path(bam) ]
21+
pattern: "*.{bam/cram/sam}"
22+
## TODO nf-core: List all of the channels used as output with a descriptions and their structure
23+
output:
24+
- bam:
25+
type: file
26+
description: |
27+
Channel containing BAM files
28+
Structure: [ val(meta), path(bam) ]
29+
pattern: "*.bam"
30+
- bai:
31+
type: file
32+
description: |
33+
Channel containing indexed BAM (BAI) files
34+
Structure: [ val(meta), path(bai) ]
35+
pattern: "*.bai"
36+
- csi:
37+
type: file
38+
description: |
39+
Channel containing CSI files
40+
Structure: [ val(meta), path(csi) ]
41+
pattern: "*.csi"
42+
- versions:
43+
type: file
44+
description: |
45+
File containing software versions
46+
Structure: [ path(versions.yml) ]
47+
pattern: "versions.yml"
48+
authors:
49+
- "@trianglegrrl"
50+
maintainers:
51+
- "@trianglegrrl"
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// TODO nf-core: Once you have added the required tests, please run the following command to build this file:
2+
// nf-core subworkflows test haplotype_human_mtdna
3+
nextflow_workflow {
4+
5+
name "Test Subworkflow HAPLOTYPE_HUMAN_MTDNA"
6+
script "../main.nf"
7+
workflow "HAPLOTYPE_HUMAN_MTDNA"
8+
9+
tag "subworkflows"
10+
tag "subworkflows_nfcore"
11+
tag "subworkflows/haplotype_human_mtdna"
12+
tag "haplogrep3"
13+
tag "haplogrep3/classify"
14+
15+
test("homo_sapiens - vcf - mitochondrial") {
16+
17+
when {
18+
workflow {
19+
"""
20+
input[0] = [
21+
[ id:'test', single_end:false, reference:'hg19' ], // meta map
22+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_chrM.vcf.gz', checkIfExists: true)
23+
]
24+
"""
25+
}
26+
}
27+
28+
then {
29+
assertAll(
30+
{ assert workflow.success },
31+
{ assert snapshot(workflow.out.haplogroups).match() },
32+
{ assert snapshot(workflow.out.versions).match() }
33+
)
34+
}
35+
}
36+
}

workflows/eager.nf

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ include { METAGENOMICS } from '../subwork
3232
include { ESTIMATE_CONTAMINATION } from '../subworkflows/local/estimate_contamination'
3333
include { CALCULATE_DAMAGE } from '../subworkflows/local/calculate_damage'
3434
include { RUN_SEXDETERRMINE } from '../subworkflows/local/run_sex_determination'
35+
include { CLASSIFY_MTDNA_HAPLOGROUP } from '../subworkflows/local/classify_mtdna_haplogroup'
3536
include { MERGE_LIBRARIES } from '../subworkflows/local/merge_libraries'
3637
include { MERGE_LIBRARIES as MERGE_LIBRARIES_GENOTYPING } from '../subworkflows/local/merge_libraries'
3738
include { GENOTYPE } from '../subworkflows/local/genotype'
@@ -560,6 +561,43 @@ workflow EAGER {
560561
ch_multiqc_files = ch_multiqc_files.mix(GENOTYPE.out.mqc.collect { it[1] }.ifEmpty([]))
561562
}
562563

564+
//
565+
// SUBWORKFLOW: Run mtDNA Haplogroup Classification
566+
//
567+
568+
if (params.run_mtdna_haplogroup) {
569+
// Ensure genotyping has run, as we need its VCF output
570+
if (!params.run_genotyping) {
571+
error "Cannot run mtDNA haplogroup classification (--run_mtdna_haplogroup) without running genotyping (--run_genotyping). VCF files are required as input."
572+
}
573+
574+
// Filter the VCFs from genotyping to only include those matching the mitochondrial header
575+
ch_mito_header_for_filter = REFERENCE_INDEXING.out.mitochondrion_header
576+
.map { meta, header -> [ meta.id, header ] }
577+
578+
ch_mtdna_haplogroup_input = GENOTYPE.out.vcf
579+
.map { meta, vcf, tbi ->
580+
// Need meta.reference to filter based on mito header
581+
def reference_id = meta.reference
582+
[ reference_id, meta, vcf ]
583+
}
584+
.join(ch_mito_header_for_filter) // Join by reference_id
585+
.filter { ref_id, meta, vcf, mito_header ->
586+
// Keep only VCFs where the reference matches the expected mitochondrial header
587+
// This assumes mito_header contains the specific contig name for mtDNA
588+
// We might need a more robust check depending on mito_header content
589+
vcf.name.contains(meta.id) // Basic check if VCF name includes sample ID - adjust filter logic as needed
590+
// TODO: Refine filter logic based on actual mito_header content and VCF naming conventions
591+
}
592+
.map { ref_id, meta, vcf, mito_header ->
593+
// Reformat to the expected [meta, vcf] structure for the subworkflow
594+
[ meta, vcf ]
595+
}
596+
597+
CLASSIFY_MTDNA_HAPLOGROUP(ch_mtdna_haplogroup_input)
598+
ch_versions = ch_versions.mix(CLASSIFY_MTDNA_HAPLOGROUP.out.versions)
599+
}
600+
563601
//
564602
// Collate and save software versions
565603
//

0 commit comments

Comments
 (0)