Skip to content

Commit f3e0289

Browse files
fellen31Joon-Klaps
andauthored
Add paraphrase (#10273)
* Add paraphrase * Apply suggestions from code review Co-authored-by: Joon Klaps <joon.klaps@kuleuven.be> * Add stub tests and output sanitation * Fix versions --------- Co-authored-by: Joon Klaps <joon.klaps@kuleuven.be>
1 parent e11b956 commit f3e0289

6 files changed

Lines changed: 538 additions & 0 deletions

File tree

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
dependencies:
7+
- conda-forge::pip==26.0.1
8+
- conda-forge::python=3.14.3
9+
- pip:
10+
- paraphrase==0.2.0

modules/nf-core/paraphrase/main.nf

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
process PARAPHRASE {
2+
tag "$meta.id"
3+
label 'process_single'
4+
5+
conda "${moduleDir}/environment.yml"
6+
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
7+
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/bc/bc177b8e7e1d9bbdcbb64a6ad630c7ecc63a2229ce2b219408888bc2bb34cac3/data':
8+
'community.wave.seqera.io/library/pip_paraphrase:59a4576966ee5f0b' }"
9+
10+
input:
11+
tuple val(meta), path(jsons), val(samples)
12+
tuple val(meta2), path(yaml)
13+
val(tsv_output)
14+
15+
output:
16+
tuple val(meta), path("*.json"), emit: json, optional: true
17+
tuple val(meta), path("*.tsv"), emit: tsv, optional: true
18+
19+
tuple val("${task.process}"), val('paraphrase'), eval("paraphrase --version | sed 's/.* //'"), topic: versions, emit: versions_paraphrase
20+
21+
when:
22+
task.ext.when == null || task.ext.when
23+
24+
script:
25+
def args = task.ext.args ?: ''
26+
def prefix = task.ext.prefix ?: "${meta.id}"
27+
def rules = yaml ? "--rules $yaml" : ''
28+
def output_format = tsv_output ? 'tsv' : 'json'
29+
"""
30+
paraphrase \
31+
$args \
32+
--input ${jsons.join(' --input ')} \
33+
--sample ${samples.join(' --sample ')} \
34+
--output-format=${output_format} \
35+
$rules \
36+
> ${prefix}.${output_format}
37+
"""
38+
39+
stub:
40+
def args = task.ext.args ?: ''
41+
def prefix = task.ext.prefix ?: "${meta.id}"
42+
def output_format = tsv_output ? 'tsv' : 'json'
43+
"""
44+
echo $args
45+
46+
touch ${prefix}.${output_format}
47+
"""
48+
}
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
2+
name: "paraphrase"
3+
description: Parse and annotate paraphrase JSONs
4+
keywords:
5+
- long-read
6+
- paraphrase
7+
- annotate
8+
tools:
9+
- "paraphrase":
10+
description: "Paraphase JSON parser"
11+
homepage: "https://github.com/Clinical-Genomics/paraphrase"
12+
documentation: "https://github.com/Clinical-Genomics/paraphrase/README.md"
13+
tool_dev_url: "https://github.com/Clinical-Genomics/paraphrase"
14+
licence: ["MIT"]
15+
16+
input:
17+
- - meta:
18+
type: map
19+
description: Groovy Map containing sample information. e.g. `[ id:'sample1']`
20+
- jsons:
21+
type: file
22+
description: "One or more JSON files from paraphase"
23+
pattern: "*.json"
24+
ontologies:
25+
- edam: http://edamontology.org/format_3464 # JSON
26+
- samples:
27+
type: list
28+
description: "Sample names corresponding to the JSON files. Must be in the same order as the JSON files."
29+
- - meta2:
30+
type: map
31+
description: Groovy Map containing reference information.
32+
- yaml:
33+
type: file
34+
description: "YAML file containing rules for annotation"
35+
pattern: "*.yaml"
36+
ontologies:
37+
- edam: http://edamontology.org/format_3473 # YAML
38+
- tsv_output:
39+
type: boolean
40+
description: "Whether to output in TSV format instead of JSON. Default is false (JSON output)."
41+
42+
output:
43+
json:
44+
- - meta:
45+
type: map
46+
description: Groovy Map containing sample information. e.g. `[ id:'sample1']`
47+
- "*.json":
48+
type: file
49+
description: "Annotated output in JSON format"
50+
pattern: "*.json"
51+
ontologies:
52+
- edam: http://edamontology.org/format_3464 # JSON
53+
tsv:
54+
- - meta:
55+
type: map
56+
description: Groovy Map containing sample information. e.g. `[ id:'sample1']`
57+
- "*.tsv":
58+
type: file
59+
description: "Annotated output in TSV format"
60+
pattern: "*.tsv"
61+
ontologies:
62+
- edam: http://edamontology.org/format_3475 # TSV
63+
versions_paraphrase:
64+
- - ${task.process}:
65+
type: string
66+
description: The name of the process
67+
- paraphrase:
68+
type: string
69+
description: The name of the tool
70+
- paraphrase --version | sed 's/.* //':
71+
type: eval
72+
description: The expression to obtain the version of the tool
73+
topics:
74+
versions:
75+
- - ${task.process}:
76+
type: string
77+
description: The name of the process
78+
- paraphrase:
79+
type: string
80+
description: The name of the tool
81+
- paraphrase --version | sed 's/.* //':
82+
type: eval
83+
description: The expression to obtain the version of the tool
84+
authors:
85+
- "@fellen31"
86+
maintainers:
87+
- "@fellen31"
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
nextflow_process {
2+
3+
name "Test Process PARAPHRASE"
4+
script "../main.nf"
5+
process "PARAPHRASE"
6+
config "./nextflow.config"
7+
8+
tag "modules"
9+
tag "modules_nfcore"
10+
tag "tabix/bgzip"
11+
tag "paraphase"
12+
tag "paraphrase"
13+
14+
setup {
15+
run("TABIX_BGZIP") {
16+
script "../../tabix/bgzip/main.nf"
17+
process {
18+
"""
19+
input[0] = [
20+
[ id:'test_ref' ], // meta map
21+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr22_chr22_KI270734v1_random/sequence/genome.fa.gz', checkIfExists: true),
22+
]
23+
"""
24+
}
25+
}
26+
run("PARAPHASE") {
27+
script "../../paraphase/main.nf"
28+
process {
29+
"""
30+
input[0] = [
31+
[ id:'test', single_end:true ], // meta map
32+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam', checkIfExists: true),
33+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam.bai', checkIfExists: true),
34+
]
35+
input[1] = TABIX_BGZIP.out.output
36+
input[2] = [
37+
[:],
38+
[]
39+
]
40+
"""
41+
}
42+
}
43+
}
44+
45+
test("paraphase json - no rules") {
46+
47+
when {
48+
params {
49+
module_args = '--gene PRODH'
50+
}
51+
process {
52+
"""
53+
input[0] = PARAPHASE.out.json.map { meta, json -> [meta, json, meta.id] }
54+
input[1] = [[],[]]
55+
input[2] = false
56+
"""
57+
}
58+
}
59+
60+
then {
61+
assert process.success
62+
assertAll(
63+
{ assert snapshot(sanitizeOutput(process.out)).match() }
64+
)
65+
}
66+
67+
}
68+
69+
test("paraphase json - rules yaml") {
70+
71+
when {
72+
params {
73+
module_args = '--gene PRODH'
74+
}
75+
process {
76+
"""
77+
input[0] = PARAPHASE.out.json.map { meta, json -> [meta, json, meta.id] }
78+
input[1] = [
79+
[ id:'rules' ],
80+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/yaml/rules.yaml', checkIfExists: true)
81+
]
82+
input[2] = false
83+
"""
84+
}
85+
}
86+
87+
then {
88+
assert process.success
89+
assertAll(
90+
{ assert snapshot(sanitizeOutput(process.out)).match() }
91+
)
92+
}
93+
94+
}
95+
96+
test("paraphase json - rules yaml - tsv output") {
97+
98+
when {
99+
params {
100+
module_args = '--gene PRODH'
101+
}
102+
process {
103+
"""
104+
input[0] = PARAPHASE.out.json.map { meta, json -> [meta, json, meta.id] }
105+
input[1] = [
106+
[ id:'rules' ],
107+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/yaml/rules.yaml', checkIfExists: true)
108+
]
109+
input[2] = true
110+
"""
111+
}
112+
}
113+
114+
then {
115+
assert process.success
116+
assertAll(
117+
{ assert snapshot(sanitizeOutput(process.out)).match() }
118+
)
119+
}
120+
121+
}
122+
123+
test("paraphase json - no rules - stub") {
124+
125+
options "-stub"
126+
127+
when {
128+
params {
129+
module_args = '--gene PRODH'
130+
}
131+
process {
132+
"""
133+
input[0] = PARAPHASE.out.json.map { meta, json -> [meta, json, meta.id] }
134+
input[1] = [[],[]]
135+
input[2] = false
136+
"""
137+
}
138+
}
139+
140+
then {
141+
assert process.success
142+
assertAll(
143+
{ assert snapshot(sanitizeOutput(process.out)).match() }
144+
)
145+
}
146+
147+
}
148+
149+
test("paraphase json - rules yaml - stub") {
150+
151+
options "-stub"
152+
153+
when {
154+
params {
155+
module_args = '--gene PRODH'
156+
}
157+
process {
158+
"""
159+
input[0] = PARAPHASE.out.json.map { meta, json -> [meta, json, meta.id] }
160+
input[1] = [
161+
[ id:'rules' ],
162+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/yaml/rules.yaml', checkIfExists: true)
163+
]
164+
input[2] = false
165+
"""
166+
}
167+
}
168+
169+
then {
170+
assert process.success
171+
assertAll(
172+
{ assert snapshot(sanitizeOutput(process.out)).match() }
173+
)
174+
}
175+
176+
}
177+
178+
test("paraphase json - rules yaml - tsv output - stub") {
179+
180+
options "-stub"
181+
182+
when {
183+
params {
184+
module_args = '--gene PRODH'
185+
}
186+
process {
187+
"""
188+
input[0] = PARAPHASE.out.json.map { meta, json -> [meta, json, meta.id] }
189+
input[1] = [
190+
[ id:'rules' ],
191+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/yaml/rules.yaml', checkIfExists: true)
192+
]
193+
input[2] = true
194+
"""
195+
}
196+
}
197+
198+
then {
199+
assert process.success
200+
assertAll(
201+
{ assert snapshot(sanitizeOutput(process.out)).match() }
202+
)
203+
}
204+
205+
}
206+
}

0 commit comments

Comments
 (0)