Skip to content

Commit fca6a65

Browse files
pinin4fjordsclaude
andcommitted
feat(rustar): wire rustar-aligner as opt-in STAR drop-in [skip ci]
Add local RUSTAR_ALIGN and RUSTAR_GENOMEGENERATE modules using ghcr.io/scverse/rustar-aligner. Both reuse STAR's CLI and on-disk index format, so the dispatch in align_star and prepare_genome_indices just gets one more conditional. The new --use_rustar_star toggle mirrors the existing --use_sentieon_star / --use_parabricks_star pattern. Tests, the cross-aligner comparison harness, and on-VM verification land in follow-up commits on this branch. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 3267b3e commit fca6a65

16 files changed

Lines changed: 315 additions & 55 deletions

File tree

conf/modules/align_star.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ process {
33
// STAR alignment configuration
44
// Conditionals for when these processes run are handled in the workflow
55

6-
withName: '.*ALIGN_STAR:STAR_ALIGN|.*ALIGN_STAR:SENTIEON_STAR_ALIGN|.*ALIGN_STAR:PARABRICKS_RNA_FQ2BAM' {
6+
withName: '.*ALIGN_STAR:STAR_ALIGN|.*ALIGN_STAR:SENTIEON_STAR_ALIGN|.*ALIGN_STAR:PARABRICKS_RNA_FQ2BAM|.*ALIGN_STAR:RUSTAR_ALIGN' {
77
ext.args = {
88
def isPbrun = task.process.contains('PARABRICKS')
99
def isSentieon = task.process.endsWith(':SENTIEON_STAR_ALIGN')

main.nf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ workflow NFCORE_RNASEQ {
111111
params.skip_pseudo_alignment,
112112
params.use_sentieon_star,
113113
params.use_parabricks_star,
114+
params.use_rustar_star,
114115
isStarIndexLegacy() ?: false
115116
)
116117

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
process RUSTAR_ALIGN {
2+
tag "$meta.id"
3+
label 'process_high'
4+
5+
container "ghcr.io/scverse/rustar-aligner:dev"
6+
7+
input:
8+
tuple val(meta), path(reads, stageAs: "input*/*")
9+
tuple val(meta2), path(index)
10+
tuple val(meta3), path(gtf)
11+
val star_ignore_sjdbgtf
12+
13+
output:
14+
tuple val(meta), path('*Log.final.out') , emit: log_final
15+
tuple val(meta), path('*Log.out') , emit: log_out
16+
tuple val(meta), path('*Log.progress.out'), emit: log_progress
17+
tuple val("${task.process}"), val('rustar-aligner'), eval("rustar-aligner --version | sed -n '1{s/^rustar-aligner //;p}'"), emit: versions_rustar, topic: versions
18+
19+
tuple val(meta), path('*d.out.bam') , optional:true, emit: bam
20+
tuple val(meta), path("${prefix}.sortedByCoord.out.bam") , optional:true, emit: bam_sorted
21+
tuple val(meta), path("${prefix}.Aligned.sortedByCoord.out.bam") , optional:true, emit: bam_sorted_aligned
22+
tuple val(meta), path('*toTranscriptome.out.bam') , optional:true, emit: bam_transcript
23+
tuple val(meta), path('*Aligned.unsort.out.bam') , optional:true, emit: bam_unsorted
24+
tuple val(meta), path('*fastq.gz') , optional:true, emit: fastq
25+
tuple val(meta), path('*.tab') , optional:true, emit: tab
26+
tuple val(meta), path('*.SJ.out.tab') , optional:true, emit: spl_junc_tab
27+
tuple val(meta), path('*.ReadsPerGene.out.tab') , optional:true, emit: read_per_gene_tab
28+
tuple val(meta), path('*.out.junction') , optional:true, emit: junction
29+
tuple val(meta), path('*.out.sam') , optional:true, emit: sam
30+
tuple val(meta), path('*.wig') , optional:true, emit: wig
31+
tuple val(meta), path('*.bg') , optional:true, emit: bedgraph
32+
33+
when:
34+
task.ext.when == null || task.ext.when
35+
36+
script:
37+
def args = task.ext.args ?: ''
38+
prefix = task.ext.prefix ?: "${meta.id}"
39+
def reads1 = []
40+
def reads2 = []
41+
meta.single_end ? [reads].flatten().each{ read -> reads1 << read} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v }
42+
def ignore_gtf = star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf"
43+
attrRG = args.contains("--outSAMattrRGline") ? "" : "--outSAMattrRGline 'ID:$prefix' 'SM:$prefix'"
44+
def out_sam_type = (args.contains('--outSAMtype')) ? '' : '--outSAMtype BAM Unsorted'
45+
mv_unsorted_bam = (args.contains('--outSAMtype BAM Unsorted SortedByCoordinate')) ? "mv ${prefix}.Aligned.out.bam ${prefix}.Aligned.unsort.out.bam" : ''
46+
"""
47+
rustar-aligner \\
48+
--genomeDir $index \\
49+
--readFilesIn ${reads1.join(",")} ${reads2.join(",")} \\
50+
--runThreadN $task.cpus \\
51+
--outFileNamePrefix $prefix. \\
52+
$out_sam_type \\
53+
$ignore_gtf \\
54+
$attrRG \\
55+
$args
56+
57+
$mv_unsorted_bam
58+
59+
if [ -f ${prefix}.Unmapped.out.mate1 ]; then
60+
mv ${prefix}.Unmapped.out.mate1 ${prefix}.unmapped_1.fastq
61+
gzip ${prefix}.unmapped_1.fastq
62+
fi
63+
if [ -f ${prefix}.Unmapped.out.mate2 ]; then
64+
mv ${prefix}.Unmapped.out.mate2 ${prefix}.unmapped_2.fastq
65+
gzip ${prefix}.unmapped_2.fastq
66+
fi
67+
"""
68+
69+
stub:
70+
prefix = task.ext.prefix ?: "${meta.id}"
71+
"""
72+
echo "" | gzip > ${prefix}.unmapped_1.fastq.gz
73+
echo "" | gzip > ${prefix}.unmapped_2.fastq.gz
74+
touch ${prefix}Xd.out.bam
75+
touch ${prefix}.Log.final.out
76+
touch ${prefix}.Log.out
77+
touch ${prefix}.Log.progress.out
78+
touch ${prefix}.sortedByCoord.out.bam
79+
touch ${prefix}.toTranscriptome.out.bam
80+
touch ${prefix}.Aligned.unsort.out.bam
81+
touch ${prefix}.Aligned.sortedByCoord.out.bam
82+
touch ${prefix}.tab
83+
touch ${prefix}.SJ.out.tab
84+
touch ${prefix}.ReadsPerGene.out.tab
85+
touch ${prefix}.Chimeric.out.junction
86+
touch ${prefix}.out.sam
87+
touch ${prefix}.Signal.UniqueMultiple.str1.out.wig
88+
touch ${prefix}.Signal.UniqueMultiple.str1.out.bg
89+
"""
90+
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
process RUSTAR_GENOMEGENERATE {
2+
tag "$fasta"
3+
label 'process_high'
4+
5+
container "ghcr.io/scverse/rustar-aligner:dev"
6+
7+
input:
8+
tuple val(meta), path(fasta)
9+
tuple val(meta2), path(gtf)
10+
11+
output:
12+
tuple val(meta), path("star") , emit: index
13+
tuple val("${task.process}"), val('rustar-aligner'), eval("rustar-aligner --version | sed -n '1{s/^rustar-aligner //;p}'"), emit: versions_rustar, topic: versions
14+
15+
when:
16+
task.ext.when == null || task.ext.when
17+
18+
script:
19+
def args = task.ext.args ?: ''
20+
def args_list = args.tokenize()
21+
def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
22+
def include_gtf = gtf ? "--sjdbGTFfile $gtf" : ''
23+
// Heuristic mirrors STAR_GENOMEGENERATE's gawk/samtools-faidx pipeline, but
24+
// computed in Groovy so we don't need samtools+gawk in the rustar container.
25+
// Approximating genome length with the on-disk fasta size is within 1-2% of
26+
// the true base count and is well inside the floor() rounding of log2(len)/2-1.
27+
def auto_sa_index = ''
28+
if (!args_list.contains('--genomeSAindexNbases')) {
29+
def genome_size = fasta.size()
30+
def computed = Math.floor(Math.log(genome_size as double) / Math.log(2) / 2 - 1) as int
31+
def num_bases = Math.min(14, Math.max(1, computed))
32+
auto_sa_index = "--genomeSAindexNbases ${num_bases}"
33+
}
34+
"""
35+
mkdir star
36+
rustar-aligner \\
37+
--runMode genomeGenerate \\
38+
--genomeDir star/ \\
39+
--genomeFastaFiles $fasta \\
40+
$include_gtf \\
41+
--runThreadN $task.cpus \\
42+
$auto_sa_index \\
43+
$memory \\
44+
$args
45+
"""
46+
47+
stub:
48+
if (gtf) {
49+
"""
50+
mkdir star
51+
touch star/Genome
52+
touch star/Log.out
53+
touch star/SA
54+
touch star/SAindex
55+
touch star/chrLength.txt
56+
touch star/chrName.txt
57+
touch star/chrNameLength.txt
58+
touch star/chrStart.txt
59+
touch star/exonGeTrInfo.tab
60+
touch star/exonInfo.tab
61+
touch star/geneInfo.tab
62+
touch star/genomeParameters.txt
63+
touch star/sjdbInfo.txt
64+
touch star/sjdbList.fromGTF.out.tab
65+
touch star/sjdbList.out.tab
66+
touch star/transcriptInfo.tab
67+
"""
68+
} else {
69+
"""
70+
mkdir star
71+
touch star/Genome
72+
touch star/Log.out
73+
touch star/SA
74+
touch star/SAindex
75+
touch star/chrLength.txt
76+
touch star/chrName.txt
77+
touch star/chrNameLength.txt
78+
touch star/chrStart.txt
79+
touch star/genomeParameters.txt
80+
"""
81+
}
82+
}

nextflow.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ params {
7171
aligner = 'star_salmon'
7272
use_sentieon_star = false
7373
use_parabricks_star = false
74+
use_rustar_star = false
7475
use_rustqc = false
7576
gpu_container_options = null
7677
pseudo_aligner = null

nextflow_schema.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,11 @@
447447
"description": "Optionally accelerate STAR and MarkDuplicates with Parabricks",
448448
"fa_icon": "fas fa-running"
449449
},
450+
"use_rustar_star": {
451+
"type": "boolean",
452+
"description": "Use rustar-aligner (https://github.com/scverse/rustar-aligner), a Rust port of STAR, in place of the standard STAR processes. Experimental.",
453+
"fa_icon": "fas fa-running"
454+
},
450455
"gpu_container_options": {
451456
"type": "string",
452457
"description": "Override container GPU flags for GPU tasks. Auto-detects if not set (--gpus all for Docker, --nv for Singularity/Apptainer).",

subworkflows/local/align_star/main.nf

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
//
44
include { SENTIEON_STARALIGN as SENTIEON_STAR_ALIGN } from '../../../modules/nf-core/sentieon/staralign/main'
55
include { PARABRICKS_RNAFQ2BAM as PARABRICKS_RNA_FQ2BAM } from '../../../modules/nf-core/parabricks/rnafq2bam/main'
6+
include { RUSTAR_ALIGN } from '../../../modules/local/rustar_align/align/main'
67
include { STAR_ALIGN } from '../../../modules/nf-core/star/align'
78
include { BAM_SORT_STATS_SAMTOOLS } from '../../nf-core/bam_sort_stats_samtools'
89

@@ -32,6 +33,7 @@ workflow ALIGN_STAR {
3233
fasta_fai // channel: [ val(meta), path(fasta), path(fai) ]
3334
use_sentieon_star // boolean: whether star alignment is accelerated with Sentieon
3435
use_parabricks_star // boolean: whether star alignment (and mark duplicates) is accelerated with Parabricks
36+
use_rustar_star // boolean: whether to use rustar-aligner in place of STAR
3537
skip_markduplicates // boolean: whether to skip marking duplicates
3638

3739
main:
@@ -51,6 +53,11 @@ workflow ALIGN_STAR {
5153
PARABRICKS_RNA_FQ2BAM(reads, fasta_fai.map { meta, fasta, _fai -> [ meta, fasta ] }, index, true, !skip_markduplicates)
5254
ch_star_out = PARABRICKS_RNA_FQ2BAM
5355

56+
} else if (use_rustar_star) {
57+
58+
RUSTAR_ALIGN(reads, index, gtf, star_ignore_sjdbgtf)
59+
ch_star_out = RUSTAR_ALIGN
60+
5461
} else {
5562

5663
STAR_ALIGN(reads, index, gtf, star_ignore_sjdbgtf)

subworkflows/local/align_star/tests/main.extra_args.nf.test

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ nextflow_workflow {
3131
star_ignore_sjdbgtf = false
3232
use_sentieon_star = false
3333
use_parabricks_star = false
34+
use_rustar_star = false
3435
skip_markduplicates = false
3536

3637
input[0] = channel.of([
@@ -53,7 +54,8 @@ nextflow_workflow {
5354
])
5455
input[5] = use_sentieon_star
5556
input[6] = use_parabricks_star
56-
input[7] = skip_markduplicates
57+
input[7] = use_rustar_star
58+
input[8] = skip_markduplicates
5759
"""
5860
}
5961
}

subworkflows/local/align_star/tests/main.nf.test

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ nextflow_workflow {
3131
star_ignore_sjdbgtf = false
3232
use_sentieon_star = false
3333
use_parabricks_star = false
34+
use_rustar_star = false
3435
skip_markduplicates = false
3536

3637
input[0] = channel.of([
@@ -53,7 +54,8 @@ nextflow_workflow {
5354
])
5455
input[5] = use_sentieon_star
5556
input[6] = use_parabricks_star
56-
input[7] = skip_markduplicates
57+
input[7] = use_rustar_star
58+
input[8] = skip_markduplicates
5759
"""
5860
}
5961
}
@@ -124,6 +126,7 @@ nextflow_workflow {
124126
star_ignore_sjdbgtf = false
125127
use_sentieon_star = false
126128
use_parabricks_star = false
129+
use_rustar_star = false
127130
skip_markduplicates = false
128131

129132
input[0] = channel.of([
@@ -146,7 +149,8 @@ nextflow_workflow {
146149
])
147150
input[5] = use_sentieon_star
148151
input[6] = use_parabricks_star
149-
input[7] = skip_markduplicates
152+
input[7] = use_rustar_star
153+
input[8] = skip_markduplicates
150154
"""
151155
}
152156
}
@@ -205,6 +209,7 @@ nextflow_workflow {
205209
star_ignore_sjdbgtf = false
206210
use_sentieon_star = false
207211
use_parabricks_star = false
212+
use_rustar_star = false
208213
skip_markduplicates = false
209214

210215
input[0] = channel.of([
@@ -227,7 +232,8 @@ nextflow_workflow {
227232
])
228233
input[5] = use_sentieon_star
229234
input[6] = use_parabricks_star
230-
input[7] = skip_markduplicates
235+
input[7] = use_rustar_star
236+
input[8] = skip_markduplicates
231237
"""
232238
}
233239
}

subworkflows/local/align_star/tests/main.parabricks.nf.test

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ nextflow_workflow {
3131
star_ignore_sjdbgtf = false
3232
use_sentieon_star = false
3333
use_parabricks_star = true
34+
use_rustar_star = false
3435
skip_markduplicates = false
3536

3637
input[0] = channel.of([
@@ -53,7 +54,8 @@ nextflow_workflow {
5354
])
5455
input[5] = use_sentieon_star
5556
input[6] = use_parabricks_star
56-
input[7] = skip_markduplicates
57+
input[7] = use_rustar_star
58+
input[8] = skip_markduplicates
5759
"""
5860
}
5961
}
@@ -93,6 +95,7 @@ nextflow_workflow {
9395
star_ignore_sjdbgtf = false
9496
use_sentieon_star = false
9597
use_parabricks_star = true
98+
use_rustar_star = false
9699
skip_markduplicates = false
97100

98101
input[0] = channel.of([
@@ -115,7 +118,8 @@ nextflow_workflow {
115118
])
116119
input[5] = use_sentieon_star
117120
input[6] = use_parabricks_star
118-
input[7] = skip_markduplicates
121+
input[7] = use_rustar_star
122+
input[8] = skip_markduplicates
119123
"""
120124
}
121125
}

0 commit comments

Comments
 (0)