44# SPDX-FileContributor: Stephan Druskat
55
66"""Basic CLI to list plugins from the Hermes marketplace."""
7+
8+ from functools import cache
79from html .parser import HTMLParser
810from typing import List , Optional
911
1012import requests
1113from pydantic import BaseModel , ConfigDict , Field
1214from pydantic .alias_generators import to_camel
1315
14- from hermes .utils import hermes_doi , hermes_user_agent
16+ from hermes .utils import hermes_doi , hermes_concept_doi , hermes_user_agent
1517
1618MARKETPLACE_URL = "https://hermes.software-metadata.pub/marketplace"
1719
@@ -63,7 +65,14 @@ class SchemaOrgSoftwareApplication(SchemaOrgModel):
6365 keywords : List ["str" ] = None
6466
6567
66- schema_org_hermes = SchemaOrgSoftwareApplication (id_ = hermes_doi , name = "hermes" )
68+ schema_org_hermes = SchemaOrgSoftwareApplication (
69+ id_ = (
70+ hermes_doi
71+ if hermes_doi .startswith ("https://doi.org/" )
72+ else f"https://doi.org/{ hermes_doi } "
73+ ),
74+ name = "hermes" ,
75+ )
6776
6877
6978class PluginMarketPlaceParser (HTMLParser ):
@@ -87,6 +96,49 @@ def handle_data(self, data):
8796 self .plugins .append (plugin )
8897
8998
99+ @cache
100+ def _doi_is_version_of_concept_doi (doi : str , concept_doi : str ) -> bool :
101+ """Check whether ``doi`` is a version of ``concept_doi``.
102+
103+ The check is performed by requesting ``doi`` from the DataCite API and checking
104+ whether its related identifier of type ``IsVersionOf`` points to ``concept_doi``.
105+ This is the case if ``conecpt_doi`` is the concept DOI of ``doi``.
106+ """
107+
108+ doi = doi .removeprefix ("https://doi.org/" )
109+ concept_doi = concept_doi .removeprefix ("https://doi.org/" )
110+
111+ response = requests .get (
112+ f"https://api.datacite.org/dois/{ doi } " ,
113+ headers = {"User-Agent" : hermes_user_agent },
114+ )
115+ response .raise_for_status ()
116+
117+ for identifier in response .json ()["data" ]["attributes" ]["relatedIdentifiers" ]:
118+ if (
119+ identifier ["relationType" ] == "IsVersionOf"
120+ and identifier ["relatedIdentifier" ] == concept_doi
121+ ):
122+ return True
123+
124+ return False
125+
126+
127+ def _is_hermes_reference (reference : Optional [SchemaOrgModel ]):
128+ """Figure out whether ``reference`` refers to HERMES."""
129+ if reference is None :
130+ return False
131+
132+ if reference .id_ in [
133+ schema_org_hermes .id_ ,
134+ hermes_concept_doi ,
135+ f"https://doi.org/{ hermes_concept_doi } " ,
136+ ]:
137+ return True
138+
139+ return _doi_is_version_of_concept_doi (reference .id_ , hermes_concept_doi )
140+
141+
90142def _sort_plugins_by_step (plugins : list [SchemaOrgSoftwareApplication ]) -> dict [str , list [SchemaOrgSoftwareApplication ]]:
91143 sorted_plugins = {k : [] for k in ["harvest" , "process" , "curate" , "deposit" , "postprocess" ]}
92144 for p in plugins :
@@ -109,7 +161,11 @@ def main():
109161 )
110162
111163 def _plugin_loc (_plugin : SchemaOrgSoftwareApplication ) -> str :
112- return "builtin" if _plugin .is_part_of == schema_org_hermes else (_plugin .url or "" )
164+ return (
165+ "builtin"
166+ if _is_hermes_reference (_plugin .is_part_of )
167+ else (_plugin .url or "" )
168+ )
113169
114170 if parser .plugins :
115171 print ()
0 commit comments