diff --git a/modules/nf-core/paraphrase/environment.yml b/modules/nf-core/paraphrase/environment.yml new file mode 100644 index 000000000000..2a2259bbe679 --- /dev/null +++ b/modules/nf-core/paraphrase/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::pip==26.0.1 + - conda-forge::python=3.14.3 + - pip: + - paraphrase==0.2.0 diff --git a/modules/nf-core/paraphrase/main.nf b/modules/nf-core/paraphrase/main.nf new file mode 100644 index 000000000000..d9c7b161ac75 --- /dev/null +++ b/modules/nf-core/paraphrase/main.nf @@ -0,0 +1,48 @@ +process PARAPHRASE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/bc/bc177b8e7e1d9bbdcbb64a6ad630c7ecc63a2229ce2b219408888bc2bb34cac3/data': + 'community.wave.seqera.io/library/pip_paraphrase:59a4576966ee5f0b' }" + + input: + tuple val(meta), path(jsons), val(samples) + tuple val(meta2), path(yaml) + val(tsv_output) + + output: + tuple val(meta), path("*.json"), emit: json, optional: true + tuple val(meta), path("*.tsv"), emit: tsv, optional: true + + tuple val("${task.process}"), val('paraphrase'), eval("paraphrase --version | sed 's/.* //'"), topic: versions, emit: versions_paraphrase + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def rules = yaml ? "--rules $yaml" : '' + def output_format = tsv_output ? 'tsv' : 'json' + """ + paraphrase \ + $args \ + --input ${jsons.join(' --input ')} \ + --sample ${samples.join(' --sample ')} \ + --output-format=${output_format} \ + $rules \ + > ${prefix}.${output_format} + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def output_format = tsv_output ? 'tsv' : 'json' + """ + echo $args + + touch ${prefix}.${output_format} + """ +} diff --git a/modules/nf-core/paraphrase/meta.yml b/modules/nf-core/paraphrase/meta.yml new file mode 100644 index 000000000000..5e4f2a3c2289 --- /dev/null +++ b/modules/nf-core/paraphrase/meta.yml @@ -0,0 +1,87 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "paraphrase" +description: Parse and annotate paraphrase JSONs +keywords: + - long-read + - paraphrase + - annotate +tools: + - "paraphrase": + description: "Paraphase JSON parser" + homepage: "https://github.com/Clinical-Genomics/paraphrase" + documentation: "https://github.com/Clinical-Genomics/paraphrase/README.md" + tool_dev_url: "https://github.com/Clinical-Genomics/paraphrase" + licence: ["MIT"] + +input: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ id:'sample1']` + - jsons: + type: file + description: "One or more JSON files from paraphase" + pattern: "*.json" + ontologies: + - edam: http://edamontology.org/format_3464 # JSON + - samples: + type: list + description: "Sample names corresponding to the JSON files. Must be in the same order as the JSON files." + - - meta2: + type: map + description: Groovy Map containing reference information. + - yaml: + type: file + description: "YAML file containing rules for annotation" + pattern: "*.yaml" + ontologies: + - edam: http://edamontology.org/format_3473 # YAML + - tsv_output: + type: boolean + description: "Whether to output in TSV format instead of JSON. Default is false (JSON output)." + +output: + json: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ id:'sample1']` + - "*.json": + type: file + description: "Annotated output in JSON format" + pattern: "*.json" + ontologies: + - edam: http://edamontology.org/format_3464 # JSON + tsv: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ id:'sample1']` + - "*.tsv": + type: file + description: "Annotated output in TSV format" + pattern: "*.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + versions_paraphrase: + - - ${task.process}: + type: string + description: The name of the process + - paraphrase: + type: string + description: The name of the tool + - paraphrase --version | sed 's/.* //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - paraphrase: + type: string + description: The name of the tool + - paraphrase --version | sed 's/.* //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@fellen31" +maintainers: + - "@fellen31" diff --git a/modules/nf-core/paraphrase/tests/main.nf.test b/modules/nf-core/paraphrase/tests/main.nf.test new file mode 100644 index 000000000000..8f1494d1b2d6 --- /dev/null +++ b/modules/nf-core/paraphrase/tests/main.nf.test @@ -0,0 +1,206 @@ +nextflow_process { + + name "Test Process PARAPHRASE" + script "../main.nf" + process "PARAPHRASE" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "tabix/bgzip" + tag "paraphase" + tag "paraphrase" + + setup { + run("TABIX_BGZIP") { + script "../../tabix/bgzip/main.nf" + process { + """ + input[0] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr22_chr22_KI270734v1_random/sequence/genome.fa.gz', checkIfExists: true), + ] + """ + } + } + run("PARAPHASE") { + script "../../paraphase/main.nf" + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam.bai', checkIfExists: true), + ] + input[1] = TABIX_BGZIP.out.output + input[2] = [ + [:], + [] + ] + """ + } + } + } + + test("paraphase json - no rules") { + + when { + params { + module_args = '--gene PRODH' + } + process { + """ + input[0] = PARAPHASE.out.json.map { meta, json -> [meta, json, meta.id] } + input[1] = [[],[]] + input[2] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + + } + + test("paraphase json - rules yaml") { + + when { + params { + module_args = '--gene PRODH' + } + process { + """ + input[0] = PARAPHASE.out.json.map { meta, json -> [meta, json, meta.id] } + input[1] = [ + [ id:'rules' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/yaml/rules.yaml', checkIfExists: true) + ] + input[2] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + + } + + test("paraphase json - rules yaml - tsv output") { + + when { + params { + module_args = '--gene PRODH' + } + process { + """ + input[0] = PARAPHASE.out.json.map { meta, json -> [meta, json, meta.id] } + input[1] = [ + [ id:'rules' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/yaml/rules.yaml', checkIfExists: true) + ] + input[2] = true + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + + } + + test("paraphase json - no rules - stub") { + + options "-stub" + + when { + params { + module_args = '--gene PRODH' + } + process { + """ + input[0] = PARAPHASE.out.json.map { meta, json -> [meta, json, meta.id] } + input[1] = [[],[]] + input[2] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + + } + + test("paraphase json - rules yaml - stub") { + + options "-stub" + + when { + params { + module_args = '--gene PRODH' + } + process { + """ + input[0] = PARAPHASE.out.json.map { meta, json -> [meta, json, meta.id] } + input[1] = [ + [ id:'rules' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/yaml/rules.yaml', checkIfExists: true) + ] + input[2] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + + } + + test("paraphase json - rules yaml - tsv output - stub") { + + options "-stub" + + when { + params { + module_args = '--gene PRODH' + } + process { + """ + input[0] = PARAPHASE.out.json.map { meta, json -> [meta, json, meta.id] } + input[1] = [ + [ id:'rules' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/yaml/rules.yaml', checkIfExists: true) + ] + input[2] = true + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + + } +} diff --git a/modules/nf-core/paraphrase/tests/main.nf.test.snap b/modules/nf-core/paraphrase/tests/main.nf.test.snap new file mode 100644 index 000000000000..17efa7fd4849 --- /dev/null +++ b/modules/nf-core/paraphrase/tests/main.nf.test.snap @@ -0,0 +1,182 @@ +{ + "paraphase json - rules yaml": { + "content": [ + { + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.json:md5,27db48ae0e6960221fc7b224711a5373" + ] + ], + "tsv": [ + + ], + "versions_paraphrase": [ + [ + "PARAPHRASE", + "paraphrase", + "0.2.0" + ] + ] + } + ], + "timestamp": "2026-02-27T13:40:26.241158374", + "meta": { + "nf-test": "0.9.4:md5,3b1b0b457e32f7b2c51c0cf5658dd1cf", + "nextflow": "26.01.1" + } + }, + "paraphase json - no rules": { + "content": [ + { + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.json:md5,6b289a071010d6cb0253282ce5e0fd94" + ] + ], + "tsv": [ + + ], + "versions_paraphrase": [ + [ + "PARAPHRASE", + "paraphrase", + "0.2.0" + ] + ] + } + ], + "timestamp": "2026-02-27T13:40:14.980861669", + "meta": { + "nf-test": "0.9.4:md5,3b1b0b457e32f7b2c51c0cf5658dd1cf", + "nextflow": "26.01.1" + } + }, + "paraphase json - no rules - stub": { + "content": [ + { + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tsv": [ + + ], + "versions_paraphrase": [ + [ + "PARAPHRASE", + "paraphrase", + "0.2.0" + ] + ] + } + ], + "timestamp": "2026-02-27T13:40:44.559269452", + "meta": { + "nf-test": "0.9.4:md5,3b1b0b457e32f7b2c51c0cf5658dd1cf", + "nextflow": "26.01.1" + } + }, + "paraphase json - rules yaml - tsv output - stub": { + "content": [ + { + "json": [ + + ], + "tsv": [ + [ + { + "id": "test", + "single_end": true + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_paraphrase": [ + [ + "PARAPHRASE", + "paraphrase", + "0.2.0" + ] + ] + } + ], + "timestamp": "2026-02-27T13:40:58.558739068", + "meta": { + "nf-test": "0.9.4:md5,3b1b0b457e32f7b2c51c0cf5658dd1cf", + "nextflow": "26.01.1" + } + }, + "paraphase json - rules yaml - stub": { + "content": [ + { + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tsv": [ + + ], + "versions_paraphrase": [ + [ + "PARAPHRASE", + "paraphrase", + "0.2.0" + ] + ] + } + ], + "timestamp": "2026-02-27T13:40:51.544118803", + "meta": { + "nf-test": "0.9.4:md5,3b1b0b457e32f7b2c51c0cf5658dd1cf", + "nextflow": "26.01.1" + } + }, + "paraphase json - rules yaml - tsv output": { + "content": [ + { + "json": [ + + ], + "tsv": [ + [ + { + "id": "test", + "single_end": true + }, + "test.tsv:md5,d7f2ca2e7a3de5adf3e322fa0d43a6d6" + ] + ], + "versions_paraphrase": [ + [ + "PARAPHRASE", + "paraphrase", + "0.2.0" + ] + ] + } + ], + "timestamp": "2026-02-27T13:40:37.597570496", + "meta": { + "nf-test": "0.9.4:md5,3b1b0b457e32f7b2c51c0cf5658dd1cf", + "nextflow": "26.01.1" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/paraphrase/tests/nextflow.config b/modules/nf-core/paraphrase/tests/nextflow.config new file mode 100644 index 000000000000..86c8071de42e --- /dev/null +++ b/modules/nf-core/paraphrase/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'PARAPHASE' { + ext.args = params.module_args + } +}