Skip to content

Commit d50c945

Browse files
committed
Add robust Dataverse version parsing and tests
Allow parsing of Dataverse version strings with trailing descriptors by relaxing VERSION_PATTERN and adding _extract_major_version(version_string). The helper extracts the major version or raises a ValueError for invalid prefixes; Dataverse now uses this helper to enforce MINIMUM_MAJOR_VERSION. Added unit tests (tests/dataverse/test_version_parsing.py) covering accepted formats (e.g., "6.10", "v6.10.1", "6.10 bugfixes") and invalid inputs.
1 parent 63b670d commit d50c945

2 files changed

Lines changed: 60 additions & 11 deletions

File tree

pyDataverse/dataverse/dataverse.py

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,37 @@
3838
from .collection import Collection
3939
from .dataset import Dataset
4040

41-
VERSION_PATTERN = re.compile(r"^v?(\d+)(?:\.\d+)*(?:-.*)?$")
41+
VERSION_PATTERN = re.compile(r"^v?(\d+)(?:\.\d+)*")
4242
MINIMUM_MAJOR_VERSION = 6
4343

44+
45+
def _extract_major_version(version_string: str) -> int:
46+
"""
47+
Extract the major version from a Dataverse version string.
48+
49+
Supports canonical versions (e.g., ``6.10``, ``v6.10.1``, ``6.10.1-SNAPSHOT``)
50+
and versions with trailing descriptors returned by some installations
51+
(e.g., ``6.10 bugfixes``).
52+
53+
Args:
54+
version_string: Raw version string returned by the Dataverse API.
55+
56+
Returns:
57+
int: Parsed major version.
58+
59+
Raises:
60+
ValueError: If the version string does not start with a valid version number.
61+
"""
62+
match = VERSION_PATTERN.match(version_string.strip())
63+
64+
if match is None:
65+
raise ValueError(
66+
f"Unable to parse version string: {version_string}. "
67+
"Expected to start with: vX.Y.Z, X.Y, X.Y.Z-SUFFIX, or X.Y <description>"
68+
)
69+
70+
return int(match.group(1))
71+
4472
# Used for autocomplete in the create_dataset method
4573
Subject = Literal[
4674
"Agricultural Sciences",
@@ -183,17 +211,9 @@ def _check_api_version(self) -> None:
183211
ValueError: If the Dataverse instance version is below the minimum required version.
184212
"""
185213
version = self.native_api.get_info_version()
186-
match = VERSION_PATTERN.match(version.version)
187-
188-
if match is None:
189-
raise ValueError(
190-
f"Unable to parse version string: {version.version}. "
191-
f"Expected format: vX.Y.Z or X.Y or X.Y.Z-SUFFIX"
192-
)
193-
194-
major_version_str = match.group(1)
214+
major_version = _extract_major_version(version.version)
195215

196-
if int(major_version_str) < MINIMUM_MAJOR_VERSION:
216+
if major_version < MINIMUM_MAJOR_VERSION:
197217
raise ValueError(
198218
f"Dataverse version {version.version} is below the minimum required "
199219
f"version {MINIMUM_MAJOR_VERSION}. Please upgrade your Dataverse instance."
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
"""Unit tests for Dataverse version parsing."""
2+
3+
import pytest
4+
5+
from pyDataverse.dataverse.dataverse import _extract_major_version
6+
7+
8+
@pytest.mark.parametrize(
9+
"version_string, expected_major",
10+
[
11+
("6.10", 6),
12+
("v6.10.1", 6),
13+
("6.10.1-SNAPSHOT", 6),
14+
("6.10 bugfixes", 6),
15+
(" 7.2 custom-build ", 7),
16+
],
17+
)
18+
def test_extract_major_version_accepts_known_formats(
19+
version_string: str, expected_major: int
20+
) -> None:
21+
"""It parses major versions from canonical and descriptor-suffixed strings."""
22+
assert _extract_major_version(version_string) == expected_major
23+
24+
25+
@pytest.mark.parametrize("version_string", ["", "bugfixes 6.10", "release-v6.10"])
26+
def test_extract_major_version_rejects_invalid_prefix(version_string: str) -> None:
27+
"""It fails if the version string does not start with a version number."""
28+
with pytest.raises(ValueError, match="Unable to parse version string"):
29+
_extract_major_version(version_string)

0 commit comments

Comments
 (0)