Skip to content

Commit 2418513

Browse files
authored
Merge pull request #7919 from TomHarrop/add-tiberius
Add tiberius
2 parents 7d30fcc + 44cd6ed commit 2418513

9 files changed

Lines changed: 258 additions & 0 deletions

tools/tiberius/.shed.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
categories: [Genome annotation]
3+
description: Tiberius is a tool for deep learning-based ab initio gene structure prediction
4+
homepage_url: https://github.com/Gaius-Augustus/Tiberius
5+
long_description: Tiberius is a tool for deep learning-based ab initio gene structure prediction
6+
name: tiberius
7+
owner: iuc
8+
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/tools/tiberius
9+
type: unrestricted

tools/tiberius/macros.xml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<macros>
2+
<token name="@VERSION@">2.0.3</token>
3+
<xml name="requirements">
4+
<requirements>
5+
<container type="docker">larsgabriel23/tiberius:@VERSION@</container>
6+
</requirements>
7+
</xml>
8+
<xml name="xrefs">
9+
<xrefs>
10+
<xref type="bio.tools">tiberius</xref>
11+
</xrefs>
12+
</xml>
13+
<xml name="citation">
14+
<citations>
15+
<citation type="doi">10.1093/bioinformatics/btae685</citation>
16+
</citations>
17+
</xml>
18+
</macros>
980 KB
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../tool-data/tiberius_models.loc.sample

tools/tiberius/tiberius.xml

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
<tool id="tiberius" name="Tiberius" version="@VERSION@+galaxy0" profile="24.1">
2+
<description>deep learning-based ab initio gene structure prediction</description>
3+
<macros>
4+
<import>macros.xml</import>
5+
</macros>
6+
<expand macro="xrefs"/>
7+
<expand macro="requirements"/>
8+
<command detect_errors="exit_code"><![CDATA[
9+
10+
ln -s '$genome' ./genome.fa &&
11+
12+
TF_NUM_INTRAOP_THREADS="\${GALAXY_SLOTS}"
13+
tiberius.py
14+
--genome genome.fa
15+
--model_cfg '${model_cfg.fields.model_cfg}'
16+
--out output.gtf
17+
--batch_size '$batch_size'
18+
19+
#if $model_cfg.fields.softmasking == 'False':
20+
--no_softmasking
21+
#end if
22+
23+
]]></command>
24+
<inputs>
25+
<param argument="--genome" type="data" format="fasta" label="Genome sequence file"/>
26+
<conditional name="masking">
27+
<param name="softmasked" type="select" label="Has your genome been soft-masked?" help="The Tiberius developers recommend using unmasked models. Some models that were trained on soft-masked genomes are avaialable, but these are only appropriate for finding genes in genomes that were soft-masked in the same way as the training data.">
28+
<option value="unmasked" selected="true">No</option>
29+
<option value="softmasked">Yes</option>
30+
</param>
31+
<when value="unmasked">
32+
<param argument="--model_cfg" type="select" label="Model">
33+
<options from_data_table="tiberius_models">
34+
<filter type="static_value" value="False" column="4"/>
35+
<filter type="static_value" column="1" value="@VERSION@"/>
36+
<filter type="sort_by" column="7"/>
37+
</options>
38+
</param>
39+
</when>
40+
<when value="softmasked">
41+
<param argument="--model_cfg" type="select" label="Model">
42+
<options from_data_table="tiberius_models">
43+
<filter type="static_value" column="1" value="@VERSION@"/>
44+
<filter type="sort_by" column="7"/>
45+
</options>
46+
</param>
47+
</when>
48+
</conditional>
49+
<param argument="--batch_size" type="hidden" value="16"/>
50+
</inputs>
51+
<outputs>
52+
<data format="gtf" name="out_gtf" label="${tool.name} annotation with model ${model_cfg.fields.model_cfg}" from_work_dir="output.gtf"/>
53+
</outputs>
54+
<tests>
55+
<!-- test 1 -->
56+
<test expect_failure="true" maxseconds="15">
57+
<param name="genome" value="KV860338_truncated.fasta.gz"/>
58+
<conditional name="masking">
59+
<param name="model_cfg" value="Vertebrata version 1.1.7"/>
60+
</conditional>
61+
<param name="batch_size" value="2"/>
62+
<assert_command>
63+
<has_text text="no_softmasking"/>
64+
</assert_command>
65+
</test>
66+
<!-- test 2 -->
67+
<!-- <test expect_failure="true" maxseconds="30">
68+
<param name="genome" value="KV860338_truncated.fasta.gz"/>
69+
<conditional name="masking">
70+
<param name="softmasked" value="softmasked"/>
71+
<param name="model_cfg" value="Mammalia version 1.1.5 (Soft-masked)"/>
72+
</conditional>
73+
<param name="batch_size" value="2"/>
74+
<assert_command>
75+
<not_has_text text="no_softmasking"/>
76+
</assert_command>
77+
</test> -->
78+
</tests>
79+
<help><![CDATA[
80+
81+
`Tiberius <https://github.com/Gaius-Augustus/Tiberius>`__ is a
82+
deep learning-based ab initio gene structure prediction tool.
83+
It predicts genes from genomic sequence only.
84+
85+
To run Tiberius, you need to provide a FASTA file containing
86+
the genomic sequences and choose a pre-trained model.
87+
88+
More information about the models is available on the
89+
Tiberius GitHub
90+
`page <https://github.com/Gaius-Augustus/Tiberius/tree/main/model_cfg>`__
91+
92+
Tiberius end-to-end integrates convolutional and
93+
long short-term memory layers with a differentiable HMM layer.
94+
It can be used to predict gene structures from genomic
95+
sequences only (ab initio), while matching the accuracy of
96+
tools that use extrinsic evidence.
97+
98+
99+
100+
]]></help>
101+
<expand macro="citation"/>
102+
</tool>
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#clamsa container_version date model_cfg softmasking target_species tiberius_version value
2+
False 2.0.3 2025-11-10 vertebrates.yaml False Vertebrata 1.1.7 Vertebrata version 1.1.7
3+
False 2.0.3 2025-05-12 mammalia_softmasking_v2.yaml True Mammalia 1.1.5 Mammalia version 1.1.5 (Soft-masked)
4+
False 2.0.3 2025-05-12 mammalia_nosofttmasking_v2.yaml False Mammalia 1.1.5 Mammalia version 1.1.5
5+
True 2.0.3 2025-05-12 mammalia_clamsa_v2.yaml True Mammalia 1.1.5 Mammalia version 1.1.5 (Soft-masked, ClaMSA)
6+
False 2.0.3 2026-02-15 insecta.yaml False Insecta 1.1.7 Insecta version 1.1.7
7+
False 2.0.3 2026-02-06 fungi.yaml False Fungi 1.1.7 Fungi version 1.1.7
8+
False 2.0.3 2026-02-06 diatoms.yaml False Bacillariophyta 1.1.8 Bacillariophyta version 1.1.8
9+
False 2.0.3 2026-01-26 chlorophyta.yaml False Chlorophyta 1.1.7 Chlorophyta version 1.1.7
10+
False 2.0.3 2025-04-15 angiosperms.yaml False Mesangiospermae 1.1.6 Mesangiospermae version 1.1.6
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<tables>
2+
<table name="tiberius_models" comment_char="#">
3+
<columns>clamsa, container_version, date, model_cfg, softmasking, target_species, tiberius_version, value</columns>
4+
<file path="tool-data/tiberius_models.loc" />
5+
</table>
6+
</tables>
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<tables>
2+
<table name="tiberius_models" comment_char="#">
3+
<columns>clamsa, container_version, date, model_cfg, softmasking, target_species, tiberius_version, value</columns>
4+
<file path="${__HERE__}/test-data/tiberius_models.loc" />
5+
</table>
6+
</tables>

tools/tiberius/update_loc_file.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#!/usr/bin/env python3
2+
3+
4+
import argparse
5+
import sys
6+
import tempfile
7+
import xml.etree.ElementTree as ET
8+
from csv import DictWriter
9+
from pathlib import Path
10+
from shutil import which
11+
from subprocess import PIPE, Popen
12+
13+
from yaml import safe_load
14+
15+
_TIBERIUS_REPO = "https://github.com/Gaius-Augustus/Tiberius"
16+
_YAML_KEYS = ["target_species", "tiberius_version", "date", "softmasking", "clamsa"]
17+
_LOC_FILE_PATH = Path("tool-data/tiberius_models.loc.sample")
18+
19+
20+
def get_model_dict(yaml_file):
21+
with open(yaml_file, "rt") as f:
22+
model_config = safe_load(f)
23+
model_dict = {k: model_config.get(k) for k in _YAML_KEYS}
24+
model_dict["model_cfg"] = yaml_file.name
25+
return model_dict
26+
27+
28+
def get_version(macros_xml):
29+
tree = ET.parse(macros_xml)
30+
for token in tree.findall("token"):
31+
if token.attrib["name"] == "@VERSION@":
32+
return str(token.text)
33+
raise ValueError("No token element with name @VERSION@ found")
34+
35+
36+
def pull_repo(tool_version):
37+
38+
git = which("git")
39+
40+
tempdir = tempfile.mkdtemp()
41+
42+
# pull the tag
43+
pull_command = [
44+
Path(git),
45+
"clone",
46+
"--single-branch",
47+
"--branch",
48+
f"v{tool_version}",
49+
_TIBERIUS_REPO,
50+
tempdir,
51+
]
52+
with Popen(pull_command, stdout=PIPE) as proc:
53+
print(proc.stdout.read(), sys.stdout)
54+
55+
return tempdir
56+
57+
58+
def parse_arguments():
59+
parser = argparse.ArgumentParser()
60+
61+
parser.add_argument("macros_xml", help="macros.xml file", type=Path)
62+
63+
return parser.parse_args()
64+
65+
66+
def make_display_name(model_dict):
67+
base_name = (
68+
model_dict["target_species"] + f' version {model_dict["tiberius_version"]}'
69+
)
70+
model_type = [
71+
"Soft-masked" if model_dict["softmasking"] else None,
72+
"ClaMSA" if model_dict["clamsa"] else None,
73+
]
74+
model_type = [x for x in model_type if x is not None]
75+
76+
if model_type:
77+
return base_name + f' ({", ".join(model_type)})'
78+
79+
return base_name
80+
81+
82+
def main():
83+
args = parse_arguments()
84+
tool_version = get_version(args.macros_xml)
85+
86+
tool_repo = pull_repo(tool_version)
87+
88+
yaml_files = Path(tool_repo, "model_cfg").glob("*.yaml")
89+
90+
model_dicts = [get_model_dict(x) for x in yaml_files]
91+
92+
for model_dict in model_dicts:
93+
model_dict["value"] = make_display_name(model_dict)
94+
model_dict["container_version"] = tool_version
95+
96+
with open(_LOC_FILE_PATH, "wt") as f:
97+
loc_file_keys = sorted(set().union(*(d.keys() for d in model_dicts)))
98+
header = "\t".join(loc_file_keys)
99+
f.write(f"#{header}\n")
100+
fc = DictWriter(f, fieldnames=loc_file_keys, delimiter="\t")
101+
fc.writerows(model_dicts)
102+
103+
104+
if __name__ == "__main__":
105+
106+
main()

0 commit comments

Comments
 (0)