Skip to content

Commit 279e672

Browse files
author
notactuallyfinn
committed
added another process test and fixed small bug
1 parent a0c0005 commit 279e672

3 files changed

Lines changed: 206 additions & 25 deletions

File tree

src/hermes/commands/process/standard_merge.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import requests
1212

1313
from hermes.commands.base import HermesCommand
14-
from hermes.model.merge.action import Concat, MergeAction, MergeSet
14+
from hermes.model.merge.action import Concat, IdMerge, MergeAction, MergeSet
1515
from hermes.model.types import ld_dict
1616
from hermes.model.types.ld_context import iri_map as iri
1717
from .base import HermesProcessPlugin
@@ -242,7 +242,7 @@ def match_func(left: Any, right: Any) -> bool:
242242

243243
# Filled with entries for every schema-type that can be found inside an JSON-LD dict of type
244244
# SoftwareSourceCode or SoftwareApplication using schema and CodeMeta as Context.
245-
CODEMETA_STRATEGY = {None: {None: ACTIONS["default"]}}
245+
CODEMETA_STRATEGY = {None: {None: ACTIONS["default"], "@id": IdMerge()}}
246246
""" dict[str | None, dict[str | None, MergeAction]]: MergeActions for the standard JSON_LD contexts objects. """
247247
CODEMETA_STRATEGY[iri["schema:Thing"]] = {iri["schema:owner"]: ACTIONS["OrganizationOrPerson"]}
248248

@@ -865,7 +865,7 @@ def __call__(self, command: HermesCommand) -> dict[Union[str, None], dict[Union[
865865
subtypes_for_types = CodemetaProcessPlugin.get_schema_type_hierarchy()
866866
strats = CodemetaProcessPlugin.get_schema_strategies(subtypes_for_types)
867867
strats.update(CodemetaProcessPlugin.get_codemeta_strategies(subtypes_for_types))
868-
strats[None] = {None: MergeSet(DEFAULT_MATCH)}
868+
strats[None] = {None: MergeSet(DEFAULT_MATCH), "@id": IdMerge()}
869869
except Exception:
870870
strats = {**CODEMETA_STRATEGY}
871871
for key, value in PROV_STRATEGY.items():
@@ -942,14 +942,14 @@ def get_codemeta_strategies(cls, subtypes_for_types):
942942
special_types = set(MATCH_FUNCTION_FOR_TYPE.keys())
943943

944944
# FIXME: change URL on change of context to codemeta 3.0
945-
download = requests.get("https://github.com/codemeta/codemeta/blob/2.0/crosswalk.csv")
945+
download = requests.get("https://raw.githubusercontent.com/codemeta/codemeta/blob/2.0/crosswalk.csv")
946946
decoded_content = download.content.decode('utf-8')
947947
cr = csv.reader(decoded_content.splitlines(), delimiter=',')
948948
# remove the first line (headers)
949949
property_table = list(cr)[1:]
950950
strategies = {}
951951
for property_row in property_table:
952-
if property_row[0] == "schema" or len(property_row[0]) == 0:
952+
if property_row[0] in ("schema", ""):
953953
# skip empty rows
954954
continue
955955
# generate a set of all types this property can have values of

src/hermes/model/merge/action.py

Lines changed: 51 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def merge(
2828
self: Self,
2929
target: ld_merge_dict,
3030
key: list[Union[str, int]],
31-
value: ld_merge_list,
31+
value: Union[ld_merge_list, str],
3232
update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]
3333
) -> Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]:
3434
"""
@@ -39,7 +39,7 @@ def merge(
3939
target (ld_merge_dict): The ld_merge_dict inside of which the items are merged.
4040
key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost
4141
parent of ``target`` out_parent ``out_parent[key[0]]...[key[-1]]`` results in ``value``.
42-
value (ld_merge_list): The value inside ``target`` that is to be merged with ``update``.
42+
value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``.
4343
update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target``
4444
with ``value``.
4545
@@ -56,7 +56,7 @@ def merge(
5656
self: Self,
5757
target: ld_merge_dict,
5858
key: list[Union[str, int]],
59-
value: ld_merge_list,
59+
value: Union[ld_merge_list, str],
6060
update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]
6161
) -> ld_merge_list:
6262
"""
@@ -67,16 +67,17 @@ def merge(
6767
target (ld_merge_dict): The ld_merge_dict inside of which the items are merged.
6868
key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost
6969
parent of ``target`` out_parent ``out_parent[key[0]]...[key[-1]]`` results in ``value``.
70-
value (ld_merge_list): The value inside ``target`` that is to be merged with ``update``.
70+
value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``.
7171
This value won't be changed.
7272
update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` with
7373
``value``. This value will be rejected.
7474
7575
Returns:
76-
ld_merge_list: The merged value. This value will always be ``value``.
76+
ld_merge_list | str: The merged value. This value will always be ``value``.
7777
"""
78-
# Add the entry that data has been rejected.
79-
target.reject(key, update)
78+
if value != update:
79+
# Add the entry that data has been rejected.
80+
target.reject(key, update)
8081
# Return value unchanged.
8182
return value
8283

@@ -87,7 +88,7 @@ def merge(
8788
self: Self,
8889
target: ld_merge_dict,
8990
key: list[Union[str, int]],
90-
value: ld_merge_list,
91+
value: Union[ld_merge_list, str],
9192
update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]
9293
) -> Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]:
9394
"""
@@ -98,16 +99,17 @@ def merge(
9899
target (ld_merge_dict): The ld_merge_dict inside of which the items are merged.
99100
key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost
100101
parent of ``target`` out_parent ``out_parent[key[0]]...[key[-1]]`` results in ``value``.
101-
value (ld_merge_list): The value inside ``target`` that is to be merged with ``update``.
102+
value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``.
102103
This value will bew replaced.
103104
update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` with
104105
``value``. This value will be used instead of ``value``.
105106
106107
Returns:
107108
BASIC_TYPE | TIME_TYPE | ld_dict | ld_list: The merged value. This value will be ``update``.
108109
"""
109-
# Add the entry that data has been replaced.
110-
target.replace(key, value)
110+
if value != update:
111+
# Add the entry that data has been replaced.
112+
target.replace(key, value)
111113
# Return the new value.
112114
return update
113115

@@ -118,7 +120,7 @@ def merge(
118120
self: Self,
119121
target: ld_merge_dict,
120122
key: list[Union[str, int]],
121-
value: ld_merge_list,
123+
value: Union[ld_merge_list, str],
122124
update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]
123125
) -> ld_merge_list:
124126
"""
@@ -128,12 +130,12 @@ def merge(
128130
target (ld_merge_dict): The ld_merge_dict inside of which the items are merged.
129131
key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost
130132
parent of ``target`` out_parent ``out_parent[key[0]]...[key[-1]]`` results in ``value``.
131-
value (ld_merge_list): The value inside ``target`` that is to be merged with ``update``.
133+
value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``.
132134
update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target``
133135
with ``value``.
134136
135137
Returns:
136-
ld_merge_list: The merged value (``value`` concatenated with ``update``).
138+
ld_merge_list | str: The merged value (``value`` concatenated with ``update``).
137139
"""
138140
# Concatenate the items and return the result.
139141
if isinstance(update, (list, ld_list)):
@@ -173,7 +175,7 @@ def merge(
173175
self: Self,
174176
target: ld_merge_dict,
175177
key: list[Union[str, int]],
176-
value: ld_merge_list,
178+
value: Union[ld_merge_list, str],
177179
update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]
178180
) -> ld_merge_list:
179181
"""
@@ -183,12 +185,12 @@ def merge(
183185
target (ld_merge_dict): The ld_merge_dict inside of which the items are merged.
184186
key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost
185187
parent of ``target`` out_parent ``out_parent[key[0]]...[key[-1]]`` results in ``value``.
186-
value (ld_merge_list): The value inside ``target`` that is to be merged with ``update``.
188+
value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``.
187189
update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target``
188190
with ``value``.
189191
190192
Returns:
191-
ld_merge_list: The merged value.
193+
ld_merge_list | str: The merged value.
192194
"""
193195
if not isinstance(update, (list, ld_list)):
194196
update = [update]
@@ -235,7 +237,7 @@ def merge(
235237
self: Self,
236238
target: ld_merge_dict,
237239
key: list[Union[str, int]],
238-
value: ld_merge_list,
240+
value: Union[ld_merge_list, str],
239241
update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]
240242
) -> ld_merge_list:
241243
"""
@@ -245,12 +247,12 @@ def merge(
245247
target (ld_merge_dict): The ld_merge_dict inside of which the items are merged.
246248
key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost
247249
parent of ``target`` out_parent out_parent[key[0]]...[key[-1]] results in ``value``.
248-
value (ld_merge_list): The value inside ``target`` that is to be merged with ``update``.
250+
value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``.
249251
update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target``
250252
with ``value``.
251253
252254
Returns:
253-
ld_merge_list: The merged value.
255+
ld_merge_list | str: The merged value.
254256
"""
255257
if not isinstance(update, (list, ld_list)):
256258
update = [update]
@@ -272,3 +274,32 @@ def merge(
272274
value.append(update_item)
273275
# Return the merged values.
274276
return value
277+
278+
class IdMerge(MergeAction):
279+
""" :class:`MergeAction` providing a merge function for merging ids, i.e. error if not equals else do nothing. """
280+
def merge(
281+
self: Self,
282+
target: ld_merge_dict,
283+
key: list[Union[str, int]],
284+
value: Union[ld_merge_list, str],
285+
update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]
286+
) -> ld_merge_list:
287+
"""
288+
Error if value != update or key != "@id". Else do nothing.
289+
290+
Args:
291+
target (ld_merge_dict): The ld_merge_dict inside of which the items are merged.
292+
key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost
293+
parent of ``target`` out_parent out_parent[key[0]]...[key[-1]] results in ``value``.
294+
value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``.
295+
update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target``
296+
with ``value``.
297+
298+
Returns:
299+
ld_merge_list | str: The merged value.
300+
"""
301+
if key[-1] != "@id":
302+
raise MergeError("Can't merge non-'@id' values.")
303+
if value != update:
304+
raise MergeError("Two different '@id' values are merged into the same object.")
305+
return value

test/hermes_test/commands/process/test_process.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,156 @@
4848
"http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}],
4949
}
5050
),
51+
),
52+
(
53+
{
54+
"cff": SoftwareMetadata(
55+
{
56+
"type": "SoftwareSourceCode",
57+
"author": [
58+
{
59+
"id": "https://orcid.org/0000-0003-4925-7248",
60+
"type": "Person",
61+
"affiliation": {
62+
"type": "Organization",
63+
"name": "German Aerospace Center (DLR)"
64+
},
65+
"email": "stephan.druskat@dlr.de"
66+
},
67+
{
68+
"type": "Person",
69+
"affiliation": {
70+
"type": "Organization",
71+
"name": "Forschungszentrum J\u00c3\u00bclich"
72+
},
73+
"email": "o.bertuch@fz-juelich.de",
74+
"givenName": "Oliver"
75+
},
76+
{
77+
"id": "https://orcid.org/0000-0001-8174-7795",
78+
"type": "Person",
79+
"email": "o.knodel@hzdr.de",
80+
"familyName": "Knodel",
81+
"givenName": "Oliver"
82+
}
83+
],
84+
"description": "Tool to automate software publication. Not stable yet.",
85+
"identifier": "https://doi.org/10.5281/zenodo.13221384",
86+
"license": "https://spdx.org/licenses/Apache-2.0"
87+
}
88+
),
89+
"codemeta": SoftwareMetadata(
90+
{
91+
"type": "SoftwareSourceCode",
92+
"author": [
93+
{
94+
"id": "https://orcid.org/0000-0001-6372-3853",
95+
"type": "Person",
96+
"affiliation": {
97+
"type": "Organization",
98+
"legalName": "German Aerospace Center (DLR)"
99+
},
100+
"email": "michael.meinel@dlr.de",
101+
"familyName": "Meinel",
102+
"givenName": "Michael"
103+
},
104+
{
105+
"id": "https://orcid.org/0000-0003-4925-7248",
106+
"type": "Person",
107+
"affiliation": {
108+
"type": "Organization",
109+
"legalName": "German Aerospace Center (DLR)"
110+
},
111+
"email": "stephan.druskat@dlr.de",
112+
"familyName": "Druskat",
113+
"givenName": "Stephan"
114+
},
115+
{
116+
"id": "https://orcid.org/0000-0002-2702-3419",
117+
"type": "Person",
118+
"affiliation": {
119+
"type": "Organization",
120+
"legalName": "Forschungszentrum J\u00c3\u00bclich"
121+
},
122+
"email": "o.bertuch@fz-juelich.de",
123+
"familyName": "Bertuch"
124+
},
125+
{
126+
"id": "https://orcid.org/0000-0001-8174-7795",
127+
"type": "Person",
128+
"affiliation": {
129+
"type": "Organization",
130+
"legalName": "Helmholtz-Zentrum Dresden-Rossendorf (HZDR)"
131+
},
132+
"familyName": "Knodel",
133+
"givenName": "Oliver"
134+
}
135+
],
136+
"identifier": "https://doi.org/10.5281/zenodo.13221384",
137+
"license": "https://spdx.org/licenses/Apache-2.0",
138+
"legalName": "hermes",
139+
"version": "0.9.0"
140+
},
141+
extra_vocabs = {"legalName": {"@id": "http://schema.org/name"}}
142+
)
143+
},
144+
SoftwareMetadata(
145+
{
146+
"type": "SoftwareSourceCode",
147+
"schema:author": [
148+
{
149+
"id": "https://orcid.org/0000-0001-6372-3853",
150+
"type": "Person",
151+
"affiliation": {
152+
"type": "Organization",
153+
"legalName": "German Aerospace Center (DLR)"
154+
},
155+
"email": "michael.meinel@dlr.de",
156+
"familyName": "Meinel",
157+
"givenName": "Michael"
158+
},
159+
{
160+
"id": "https://orcid.org/0000-0003-4925-7248",
161+
"type": "Person",
162+
"affiliation": {
163+
"type": "Organization",
164+
"legalName": "German Aerospace Center (DLR)"
165+
},
166+
"email": "stephan.druskat@dlr.de",
167+
"familyName": "Druskat",
168+
"givenName": "Stephan"
169+
},
170+
{
171+
"id": "https://orcid.org/0000-0002-2702-3419",
172+
"type": "Person",
173+
"affiliation": {
174+
"type": "Organization",
175+
"legalName": "Forschungszentrum J\u00c3\u00bclich"
176+
},
177+
"email": "o.bertuch@fz-juelich.de",
178+
"familyName": "Bertuch",
179+
"givenName": "Oliver"
180+
},
181+
{
182+
"id": "https://orcid.org/0000-0001-8174-7795",
183+
"type": "Person",
184+
"affiliation": {
185+
"type": "Organization",
186+
"legalName": "Helmholtz-Zentrum Dresden-Rossendorf (HZDR)"
187+
},
188+
"email": "o.knodel@hzdr.de",
189+
"familyName": "Knodel",
190+
"givenName": "Oliver"
191+
}
192+
],
193+
"description": "Tool to automate software publication. Not stable yet.",
194+
"identifier": "https://doi.org/10.5281/zenodo.13221384",
195+
"license": "https://spdx.org/licenses/Apache-2.0",
196+
"legalName": "hermes",
197+
"version": "0.9.0"
198+
},
199+
extra_vocabs = {"legalName": {"@id": "http://schema.org/name"}}
200+
),
51201
)
52202
],
53203
)

0 commit comments

Comments
 (0)