Skip to content

Commit d65a87e

Browse files
authored
Merge pull request #327 from softwarepub/fix/326-hermes-doi-in-plugin-metadata
Fix broken HERMES DOI in plugin marketplace
2 parents 385a119 + 7736f55 commit d65a87e

3 files changed

Lines changed: 166 additions & 3 deletions

File tree

src/hermes/commands/marketplace.py

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,16 @@
44
# SPDX-FileContributor: Stephan Druskat
55

66
"""Basic CLI to list plugins from the Hermes marketplace."""
7+
8+
from functools import cache
79
from html.parser import HTMLParser
810
from typing import List, Optional
911

1012
import requests
1113
from pydantic import BaseModel, ConfigDict, Field
1214
from pydantic.alias_generators import to_camel
1315

14-
from hermes.utils import hermes_doi, hermes_user_agent
16+
from hermes.utils import hermes_doi, hermes_concept_doi, hermes_user_agent
1517

1618
MARKETPLACE_URL = "https://hermes.software-metadata.pub/marketplace"
1719

@@ -63,7 +65,14 @@ class SchemaOrgSoftwareApplication(SchemaOrgModel):
6365
keywords: List["str"] = None
6466

6567

66-
schema_org_hermes = SchemaOrgSoftwareApplication(id_=hermes_doi, name="hermes")
68+
schema_org_hermes = SchemaOrgSoftwareApplication(
69+
id_=(
70+
hermes_doi
71+
if hermes_doi.startswith("https://doi.org/")
72+
else f"https://doi.org/{hermes_doi}"
73+
),
74+
name="hermes",
75+
)
6776

6877

6978
class PluginMarketPlaceParser(HTMLParser):
@@ -87,6 +96,49 @@ def handle_data(self, data):
8796
self.plugins.append(plugin)
8897

8998

99+
@cache
100+
def _doi_is_version_of_concept_doi(doi: str, concept_doi: str) -> bool:
101+
"""Check whether ``doi`` is a version of ``concept_doi``.
102+
103+
The check is performed by requesting ``doi`` from the DataCite API and checking
104+
whether its related identifier of type ``IsVersionOf`` points to ``concept_doi``.
105+
This is the case if ``conecpt_doi`` is the concept DOI of ``doi``.
106+
"""
107+
108+
doi = doi.removeprefix("https://doi.org/")
109+
concept_doi = concept_doi.removeprefix("https://doi.org/")
110+
111+
response = requests.get(
112+
f"https://api.datacite.org/dois/{doi}",
113+
headers={"User-Agent": hermes_user_agent},
114+
)
115+
response.raise_for_status()
116+
117+
for identifier in response.json()["data"]["attributes"]["relatedIdentifiers"]:
118+
if (
119+
identifier["relationType"] == "IsVersionOf"
120+
and identifier["relatedIdentifier"] == concept_doi
121+
):
122+
return True
123+
124+
return False
125+
126+
127+
def _is_hermes_reference(reference: Optional[SchemaOrgModel]):
128+
"""Figure out whether ``reference`` refers to HERMES."""
129+
if reference is None:
130+
return False
131+
132+
if reference.id_ in [
133+
schema_org_hermes.id_,
134+
hermes_concept_doi,
135+
f"https://doi.org/{hermes_concept_doi}",
136+
]:
137+
return True
138+
139+
return _doi_is_version_of_concept_doi(reference.id_, hermes_concept_doi)
140+
141+
90142
def _sort_plugins_by_step(plugins: list[SchemaOrgSoftwareApplication]) -> dict[str, list[SchemaOrgSoftwareApplication]]:
91143
sorted_plugins = {k: [] for k in ["harvest", "process", "curate", "deposit", "postprocess"]}
92144
for p in plugins:
@@ -109,7 +161,11 @@ def main():
109161
)
110162

111163
def _plugin_loc(_plugin: SchemaOrgSoftwareApplication) -> str:
112-
return "builtin" if _plugin.is_part_of == schema_org_hermes else (_plugin.url or "")
164+
return (
165+
"builtin"
166+
if _is_hermes_reference(_plugin.is_part_of)
167+
else (_plugin.url or "")
168+
)
113169

114170
if parser.plugins:
115171
print()

src/hermes/utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,7 @@
1616
# TODO: Fetch this from somewhere
1717
hermes_doi = "10.5281/zenodo.13311079" # hermes v0.8.1
1818

19+
hermes_concept_doi = "10.5281/zenodo.13221383"
20+
"""Fix "concept" DOI that always refers to the newest version."""
21+
1922
hermes_user_agent = f"{hermes_name}/{hermes_version} ({hermes_homepage})"
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# SPDX-FileCopyrightText: 2025 Helmholtz-Zentrum Dresden-Rossendorf
2+
# SPDX-License-Identifier: Apache-2.0
3+
# SPDX-FileContributor: David Pape
4+
5+
import requests_mock
6+
7+
from hermes.commands.marketplace import (
8+
schema_org_hermes,
9+
SchemaOrgModel,
10+
_is_hermes_reference,
11+
)
12+
13+
14+
def test_schema_org_hermes_doi_is_absolute():
15+
"""The HERMES DOI rendered to the plugins list must always be a full URL."""
16+
assert isinstance(schema_org_hermes, SchemaOrgModel)
17+
assert schema_org_hermes.id_ is not None
18+
assert schema_org_hermes.id_.startswith("https://doi.org/")
19+
20+
21+
def test_concept_doi_is_hermes_reference():
22+
"""The HERMES concept DOI is a reference to HERMES.
23+
24+
We must be able to figure this out without asking DataCite.
25+
"""
26+
with requests_mock.Mocker() as m:
27+
assert _is_hermes_reference(
28+
SchemaOrgModel(
29+
type_="SoftwareSourceCode",
30+
id_="https://doi.org/10.5281/zenodo.13221383",
31+
)
32+
)
33+
# DataCite API was not called
34+
assert m.call_count == 0
35+
36+
37+
def test_is_hermes_reference_if_datacite_api_returns_concept_doi_as_rel_id():
38+
with requests_mock.Mocker() as m:
39+
m.get(
40+
"https://api.datacite.org/dois/10.9999/fake.1000",
41+
text="""
42+
{
43+
"data": {
44+
"id": "10.9999/fake.1000",
45+
"type": "dois",
46+
"attributes": {
47+
"doi": "10.9999/fake.1000",
48+
"prefix": "10.9999",
49+
"suffix": "fake.1000",
50+
"relatedIdentifiers": [
51+
{
52+
"relationType": "IsVersionOf",
53+
"relatedIdentifier": "10.5281/zenodo.13221383",
54+
"relatedIdentifierType": "DOI"
55+
}
56+
]
57+
}
58+
}
59+
}
60+
""".strip(),
61+
)
62+
# 10.5281/zenodo.13221383 retured from DataCite is HERMES concept DOI
63+
assert _is_hermes_reference(
64+
SchemaOrgModel(
65+
type_="SoftwareSourceCode", id_="https://doi.org/10.9999/fake.1000"
66+
)
67+
)
68+
# DataCite API was called once
69+
assert m.call_count == 1
70+
71+
72+
def test_not_is_hermes_reference_if_datacite_api_returns_wrong_rel_id():
73+
with requests_mock.Mocker() as m:
74+
m.get(
75+
"https://api.datacite.org/dois/10.9999/fake.2000",
76+
text="""
77+
{
78+
"data": {
79+
"id": "10.9999/fake.2000",
80+
"type": "dois",
81+
"attributes": {
82+
"doi": "10.9999/fake.2000",
83+
"prefix": "10.9999",
84+
"suffix": "fake.2000",
85+
"relatedIdentifiers": [
86+
{
87+
"relationType": "IsVersionOf",
88+
"relatedIdentifier": "10.9999/fake.1999",
89+
"relatedIdentifierType": "DOI"
90+
}
91+
]
92+
}
93+
}
94+
}
95+
""".strip(),
96+
)
97+
# 10.9999/fake.1999 returned from DataCite is not HERMES concept DOI
98+
assert not _is_hermes_reference(
99+
SchemaOrgModel(
100+
type_="SoftwareSourceCode", id_="https://doi.org/10.9999/fake.2000"
101+
)
102+
)
103+
# DataCite API was called once
104+
assert m.call_count == 1

0 commit comments

Comments
 (0)