Skip to content

Commit 0644d4e

Browse files
committed
add petab export
1 parent f4f3a5f commit 0644d4e

18 files changed

Lines changed: 6081 additions & 0 deletions

pyenzyme/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
# Output functions
2121
to_pandas = EnzymeMLHandler.to_pandas
2222
to_sbml = EnzymeMLHandler.to_sbml
23+
to_petab = EnzymeMLHandler.to_petab
2324
write_enzymeml = EnzymeMLHandler.write_enzymeml
2425

2526
__all__ = [

pyenzyme/petab/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .io import to_petab
2+
3+
__all__ = ["to_petab"]

pyenzyme/petab/baserow.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from typing import Any
2+
from pydantic import BaseModel, ConfigDict
3+
4+
5+
class BaseRow(BaseModel):
6+
"""
7+
Base class for all PEtab rows.
8+
"""
9+
10+
model_config = ConfigDict(use_enum_values=True)
11+
12+
def to_row(self) -> dict[str, Any]:
13+
"""
14+
Converts the row to a dictionary suitable for a PEtab table row.
15+
"""
16+
return self.model_dump(by_alias=True, mode="json")

pyenzyme/petab/conditions.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
from typing import Dict, Self, Any
2+
from pydantic import Field
3+
4+
from pyenzyme.versions import v2
5+
from .baserow import BaseRow
6+
7+
8+
class ConditionRow(BaseRow):
9+
"""
10+
Represents a row in a PEtab conditions table.
11+
12+
This class models experimental conditions with species initial concentrations
13+
and other condition-specific parameters.
14+
15+
Attributes:
16+
condition_id: Unique identifier for the condition
17+
condition_name: Human-readable name for the condition
18+
species: Dictionary mapping species IDs to their initial concentrations
19+
"""
20+
21+
condition_id: str = Field(alias="conditionId")
22+
condition_name: str = Field(alias="conditionName")
23+
species: dict[str, float] = Field(default_factory=dict)
24+
25+
def to_row(self) -> Dict[str, Any]:
26+
"""
27+
Converts the condition to a dictionary suitable for a PEtab conditions table row.
28+
29+
Returns:
30+
Dictionary with condition ID, name, and species initial concentrations
31+
"""
32+
return {
33+
**self.model_dump(exclude={"species"}, by_alias=True),
34+
**self.species,
35+
}
36+
37+
@classmethod
38+
def from_measurements(cls, measurements: list[v2.Measurement]) -> list[Self]:
39+
"""
40+
Creates a list of ConditionRow objects from a list of PyEnzyme Measurement objects.
41+
"""
42+
return [cls.from_measurement(measurement) for measurement in measurements]
43+
44+
@classmethod
45+
def from_measurement(cls, measurement: v2.Measurement) -> Self:
46+
"""
47+
Creates a ConditionRow from a PyEnzyme Measurement object.
48+
49+
This method extracts species initial concentrations from the measurement
50+
and creates a corresponding condition row.
51+
52+
Args:
53+
measurement: PyEnzyme Measurement object containing species data
54+
55+
Returns:
56+
A new ConditionRow instance with data from the measurement
57+
"""
58+
inits = {
59+
str(meas_data.species_id): meas_data.initial
60+
for meas_data in measurement.species_data
61+
if meas_data.initial is not None
62+
}
63+
64+
return cls(
65+
conditionId=measurement.id,
66+
conditionName=measurement.name,
67+
species=inits,
68+
)

pyenzyme/petab/io.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
from pathlib import Path
2+
from typing import Union
3+
4+
import pandas as pd
5+
import yaml
6+
7+
from pyenzyme.sbml.serializer import to_sbml
8+
from pyenzyme.versions import v2
9+
10+
from .petab import PEtab, Problem
11+
from .conditions import ConditionRow
12+
from .observables import ObservableRow
13+
from .measurements import MeasurementRow
14+
from .parameters import ParameterRow
15+
16+
# Default filenames for PEtab format components
17+
PARAMETER_FILENAME = "parameters.tsv"
18+
CONDITION_FILENAME = "conditions.tsv"
19+
OBSERVABLE_FILENAME = "observables.tsv"
20+
MEASUREMENT_FILENAME = "measurements.tsv"
21+
SBML_FILENAME = "model.xml"
22+
23+
24+
def to_petab(doc: v2.EnzymeMLDocument, path: Union[Path, str]) -> PEtab:
25+
"""
26+
Convert an EnzymeML document to a PEtab parameter estimation problem.
27+
28+
This function exports an EnzymeML document to the PEtab format, which is a
29+
standardized format for specifying parameter estimation problems in systems biology.
30+
The function creates all necessary PEtab files:
31+
32+
1. SBML model file: Contains the mathematical model specification
33+
2. Condition table: Specifies experimental conditions
34+
3. Observable table: Defines model outputs that correspond to measurements
35+
4. Measurement table: Contains experimental data points
36+
5. Parameter table: Defines model parameters and their estimation settings
37+
6. YAML configuration file: Links all files together in a PEtab problem definition
38+
39+
Parameters
40+
----------
41+
doc : v2.EnzymeMLDocument
42+
The EnzymeML document to convert, containing all model information,
43+
measurements, and parameters.
44+
path : Union[Path, str]
45+
Directory path where PEtab files will be written. If the directory
46+
doesn't exist, it will be created.
47+
48+
Returns
49+
-------
50+
None
51+
Files are written to the specified path.
52+
53+
Notes
54+
-----
55+
The file naming convention is based on the EnzymeML document name,
56+
with spaces replaced by underscores and converted to lowercase.
57+
"""
58+
59+
if isinstance(path, str):
60+
path = Path(path)
61+
62+
if not path.exists():
63+
path.mkdir(parents=True)
64+
65+
# Create paths for all PEtab files
66+
name = doc.name.replace(" ", "_").lower()
67+
meta_path = path / f"{name}.yaml"
68+
condition_path = path / f"{name}_{CONDITION_FILENAME}"
69+
observable_path = path / f"{name}_{OBSERVABLE_FILENAME}"
70+
measurement_path = path / f"{name}_{MEASUREMENT_FILENAME}"
71+
parameter_path = path / f"{name}_{PARAMETER_FILENAME}"
72+
sbml_path = path / f"{name}_{SBML_FILENAME}"
73+
74+
# Write SBML model file
75+
with open(sbml_path, "w") as f:
76+
sbml, _ = to_sbml(doc)
77+
f.write(sbml)
78+
79+
# Generate and write conditions table
80+
pd.DataFrame(
81+
[row.to_row() for row in ConditionRow.from_measurements(doc.measurements)],
82+
).to_csv(condition_path, index=False, sep="\t")
83+
84+
# Generate and write observables table
85+
pd.DataFrame(
86+
[row.to_row() for row in ObservableRow.from_enzymeml(doc)],
87+
).to_csv(observable_path, index=False, sep="\t")
88+
89+
# Generate and write measurements table
90+
pd.DataFrame(
91+
[row.to_row() for row in MeasurementRow.from_measurements(doc.measurements)],
92+
).to_csv(measurement_path, index=False, sep="\t")
93+
94+
# Generate and write parameters table
95+
pd.DataFrame(
96+
[row.to_row() for row in ParameterRow.from_parameters(doc.parameters)],
97+
).to_csv(parameter_path, index=False, sep="\t")
98+
99+
# Create PEtab configuration object
100+
meta = PEtab(
101+
format_version=1,
102+
parameter_file=parameter_path,
103+
problems=[
104+
Problem(
105+
sbml_files=[sbml_path],
106+
measurement_files=[measurement_path],
107+
condition_files=[condition_path],
108+
observable_files=[observable_path],
109+
)
110+
],
111+
)
112+
113+
# Serialize configuration to YAML
114+
with open(meta_path, "w") as f:
115+
yaml.dump(
116+
meta.model_dump(
117+
mode="json",
118+
by_alias=True,
119+
exclude_none=True,
120+
),
121+
f,
122+
)
123+
124+
return meta

pyenzyme/petab/measurements.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
from typing import List, Self, Union
2+
from pydantic import Field
3+
4+
from pyenzyme.versions import v2
5+
from .baserow import BaseRow
6+
7+
8+
class MeasurementRow(BaseRow):
9+
"""
10+
Represents a row in a PEtab measurements table.
11+
12+
This class models experimental measurements with species initial concentrations
13+
and other measurement-specific parameters.
14+
15+
Attributes:
16+
observable_id (str): The identifier of the observable being measured.
17+
Maps to 'observableId' in the PEtab specification.
18+
preequilibration_condition_id (str | None): The identifier of the preequilibration condition.
19+
Maps to 'preequilibrationConditionId' in the PEtab specification.
20+
Defaults to None if no preequilibration was performed.
21+
condition_id (str): The identifier of the simulation condition.
22+
Maps to 'simulationConditionId' in the PEtab specification.
23+
measurement (float): The measured value of the observable.
24+
Maps to 'measurement' in the PEtab specification.
25+
time (float): The time point at which the measurement was taken.
26+
Maps to 'time' in the PEtab specification.
27+
observable_parameters (Union[str, float, None]): Parameters for the observable transformation.
28+
Maps to 'observableParameters' in the PEtab specification.
29+
Can be a parameter ID, a numeric value, or None if not applicable.
30+
noise_parameters (Union[str, float, None]): Parameters for the noise model.
31+
Maps to 'noiseParameters' in the PEtab specification.
32+
Can be a parameter ID, a numeric value, or None if not applicable.
33+
dataset_id (str | None): An identifier for the dataset this measurement belongs to.
34+
Maps to 'datasetId' in the PEtab specification.
35+
Useful for grouping measurements from the same experiment.
36+
replicate_id (str | None): An identifier for the replicate this measurement belongs to.
37+
Maps to 'replicateId' in the PEtab specification.
38+
Useful for identifying repeated measurements under identical conditions.
39+
"""
40+
41+
observable_id: str = Field(alias="observableId")
42+
preequilibration_condition_id: str | None = Field(
43+
default=None,
44+
alias="preequilibrationConditionId",
45+
)
46+
condition_id: str = Field(alias="simulationConditionId")
47+
measurement: float = Field(alias="measurement")
48+
time: float = Field(alias="time")
49+
observable_parameters: Union[str, float, None] = Field(
50+
default=None,
51+
alias="observableParameters",
52+
)
53+
noise_parameters: Union[str, float, None] = Field(
54+
default=None,
55+
alias="noiseParameters",
56+
)
57+
dataset_id: str | None = Field(alias="datasetId", default=None)
58+
replicate_id: str | None = Field(alias="replicateId", default=None)
59+
60+
@classmethod
61+
def from_measurements(cls, measurements: list[v2.Measurement]) -> List[Self]:
62+
"""
63+
Convert a list of EnzymeML Measurement objects to a list of PEtab MeasurementRow objects.
64+
"""
65+
return [
66+
row
67+
for measurement in measurements
68+
for row in cls.from_measurement(measurement)
69+
]
70+
71+
@classmethod
72+
def from_measurement(cls, measurement: v2.Measurement) -> List[Self]:
73+
"""
74+
Convert an EnzymeML Measurement object to a list of PEtab MeasurementRow objects.
75+
76+
This method extracts the time series data from a Measurement object and creates
77+
individual MeasurementRow entries for each time point and corresponding data value.
78+
79+
Args:
80+
measurement (v2.Measurement): An EnzymeML Measurement object containing
81+
species concentration time series data.
82+
83+
Returns:
84+
List[Self]: A list of MeasurementRow objects, each representing a single
85+
data point from the original measurement. Each row contains the species ID,
86+
measurement condition ID, the measured value, and the time point.
87+
88+
Example:
89+
If a Measurement contains data for species 'S1' with time points [0, 10, 20]
90+
and corresponding values [1.0, 0.8, 0.6], this method will return three
91+
MeasurementRow objects, one for each time-value pair.
92+
"""
93+
meas_rows = []
94+
for meas_data in measurement.species_data:
95+
for t, x in zip(meas_data.time, meas_data.data):
96+
meas_rows.append(
97+
cls(
98+
observableId=meas_data.species_id,
99+
simulationConditionId=measurement.id,
100+
measurement=x,
101+
time=t,
102+
)
103+
)
104+
return meas_rows

0 commit comments

Comments
 (0)