Skip to content

Commit b7543ee

Browse files
author
Michael Fritzsche
committed
reworked merging and added strategies
1 parent 4080091 commit b7543ee

6 files changed

Lines changed: 731 additions & 116 deletions

File tree

src/hermes/model/merge/action.py

Lines changed: 32 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from __future__ import annotations
99

10-
from typing import TYPE_CHECKING, Callable, Union
10+
from typing import TYPE_CHECKING, Any, Callable, Union
1111
from typing_extensions import Self
1212

1313
from ..types import ld_dict, ld_list
@@ -76,9 +76,8 @@ def merge(
7676
This value will always be value.
7777
:rtype: ld_merge_list
7878
"""
79-
# If necessary, add the entry that data has been rejected.
80-
if value != update:
81-
target.reject(key, update)
79+
# Add the entry that data has been rejected.
80+
target.reject(key, update)
8281
# Return value unchanged.
8382
return value
8483

@@ -111,8 +110,7 @@ def merge(
111110
:rtype: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list
112111
"""
113112
# If necessary, add the entry that data has been replaced.
114-
if value != update:
115-
target.replace(key, value)
113+
target.replace(key, value)
116114
# Return the new value.
117115
return update
118116

@@ -151,32 +149,21 @@ def merge(
151149

152150

153151
class Collect(MergeAction):
154-
def __init__(
155-
self: Self,
156-
match: Union[
157-
Callable[
158-
[
159-
Union[BASIC_TYPE, TIME_TYPE, ld_merge_dict, ld_merge_list],
160-
Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]
161-
],
162-
bool
163-
],
164-
Callable[[ld_merge_dict, ld_dict], bool]
165-
]
166-
) -> None:
152+
def __init__(self: Self, match: Callable[[Any, Any], bool], reject_incoming: bool = True) -> None:
167153
"""
168-
Set the match function for this collect merge action.
154+
Set the match function for this collect merge action. And the behaivior for matches.
169155
170156
:param match: The function used to evaluate equality while merging.
171-
:type match: Callable[
172-
[BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list, BASIC_TYPE | TIME_TYPE | ld_dict | ld_list],
173-
bool
174-
] | Callable[[ld_merge_dict, ld_dict], bool]
157+
:type match: Callable[[Any, Any], bool]
158+
:param reject_incoming: If an incoming item matches an already collected one, if ``reject_incoming`` True,
159+
the incoming item gets rejected, if ``reject_incoming`` False, the match of the incoming item gets replaced.
160+
:type reject_incoming: bool
175161
176162
:return:
177163
:rtype: None
178164
"""
179165
self.match = match
166+
self.reject_incoming = reject_incoming
180167

181168
def merge(
182169
self: Self,
@@ -206,44 +193,31 @@ def merge(
206193

207194
# iterate over all new items
208195
for update_item in update:
209-
# If the current new item has no occurence in value (according to self.match) add it to value.
210-
if not any(self.match(item, update_item) for item in value):
196+
# Iterate over all items in value and if a match is found replace the first one or reject update_item.
197+
for index, item in enumerate(value):
198+
if self.match(item, update_item):
199+
if not self.reject_incoming:
200+
value[index] = update_item
201+
break
202+
else:
203+
# If the current new item has no occurence in value (according to self.match) add it to value.
211204
value.append(update_item)
212205

213206
return value
214207

215208

216209
class MergeSet(MergeAction):
217-
def __init__(
218-
self: Self,
219-
match: Union[
220-
Callable[
221-
[
222-
Union[BASIC_TYPE, TIME_TYPE, ld_merge_dict, ld_merge_list],
223-
Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]
224-
],
225-
bool
226-
],
227-
Callable[[ld_merge_dict, ld_dict], bool]
228-
],
229-
merge_items: bool = True
230-
) -> None:
210+
def __init__(self: Self, match: Callable[[Any, Any], bool]) -> None:
231211
"""
232212
Set the match function for this collect merge action.
233213
234214
:param match: The function used to evaluate equality while merging.
235-
:type match: Callable[
236-
[BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list, BASIC_TYPE | TIME_TYPE | ld_dict | ld_list],
237-
bool
238-
] | Callable[[ld_merge_dict, ld_dict], bool]
239-
:param merge_items: Whether or to to merge similar items. (If false this is basically :class:`Concat`)
240-
:type merge_items: bool
215+
:type match: Callable[[ANy, Any], bool]
241216
242217
:return:
243218
:rtype: None
244219
"""
245220
self.match = match
246-
self.merge_items = merge_items
247221

248222
def merge(
249223
self: Self,
@@ -271,13 +245,19 @@ def merge(
271245
if not isinstance(update, (list, ld_list)):
272246
update = [update]
273247

274-
for item in update:
248+
for update_item in update:
275249
# For each new item merge it into a similar item (according to match) inside target[key[-1]]
276-
# (aka inside value) if such an item exists and merging is permitted.
250+
# (aka inside value) if such an item exists.
277251
# Otherwise append it to target[key[-1]] (aka to value).
278-
target_item = target.match(key[-1], item, self.match)
279-
if target_item and self.merge_items:
280-
target_item.update(item)
252+
for index, item in enumerate(value):
253+
if self.match(item, update_item):
254+
if isinstance(item, ld_dict) and isinstance(update_item, ld_dict):
255+
item.update(update_item)
256+
elif isinstance(item, ld_list) and isinstance(update_item, ld_list):
257+
self.merge(target, [*key, index], item, update_item)
258+
elif isinstance(item, (ld_dict, ld_list)) or isinstance(update_item, (ld_dict, ld_list)):
259+
""" FIXME: log error """
260+
break
281261
else:
282262
value.append(item)
283263
# Return the merged values.

src/hermes/model/merge/container.py

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,15 @@
77

88
from __future__ import annotations
99

10-
from typing import Callable, Union, TYPE_CHECKING
10+
from typing import TYPE_CHECKING, Any, Callable, Union
1111
from typing_extensions import Self
1212

1313
from ..types import ld_container, ld_context, ld_dict, ld_list
1414
from ..types.ld_container import (
1515
BASIC_TYPE, EXPANDED_JSON_LD_VALUE, JSON_LD_CONTEXT_DICT, JSON_LD_VALUE, TIME_TYPE
1616
)
1717
from ..types.pyld_util import bundled_loader
18-
from .strategy import CODEMETA_STRATEGY, PROV_STRATEGY, REPLACE_STRATEGY
18+
from .strategy import CODEMETA_STRATEGY, PROV_STRATEGY
1919

2020
if TYPE_CHECKING:
2121
from .action import MergeAction
@@ -153,8 +153,7 @@ def __init__(
153153
self.update_context(ld_context.HERMES_PROV_CONTEXT)
154154

155155
# add strategies
156-
self.strategies = {**REPLACE_STRATEGY}
157-
self.add_strategy(CODEMETA_STRATEGY)
156+
self.strategies = {**CODEMETA_STRATEGY}
158157
self.add_strategy(PROV_STRATEGY)
159158

160159
def update_context(
@@ -238,16 +237,7 @@ def match(
238237
self: Self,
239238
key: str,
240239
value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list],
241-
match: Union[
242-
Callable[
243-
[
244-
Union[BASIC_TYPE, TIME_TYPE, "ld_merge_dict", ld_merge_list],
245-
Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]
246-
],
247-
bool
248-
],
249-
Callable[["ld_merge_dict", ld_dict], bool]
250-
]
240+
match: Callable[[Any, Any], bool]
251241
) -> Union[BASIC_TYPE, TIME_TYPE, "ld_merge_dict", ld_merge_list]:
252242
"""
253243
Returns the first item in self[key] for which match(item, value) returns true.
@@ -260,10 +250,7 @@ def match(
260250
:param value: The value a match is searched for in self[key].
261251
:type value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]
262252
:param match: The method defining if two objects are a match.
263-
:type match: Callable[
264-
[BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list, BASIC_TYPE | TIME_TYPE | ld_dict | ld_list],
265-
bool
266-
] | Callable[[ld_merge_dict, ld_dict], bool]
253+
:type match: Callable[[Any, Any], bool]
267254
268255
:return: The item in self[key] that is a match to value if one exists else None
269256
:rtype: BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list
@@ -317,6 +304,7 @@ def _add_related(
317304
:return:
318305
:rtype: None
319306
"""
307+
# FIXME: key not only string
320308
# make sure appending is possible
321309
self.emplace(rel)
322310
# append the new entry
@@ -338,6 +326,7 @@ def reject(self: Self, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld
338326
:return:
339327
:rtype: None
340328
"""
329+
# FIXME: key not only string
341330
self._add_related("hermes-rt:reject", key, value)
342331

343332
def replace(self: Self, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]) -> None:
@@ -356,4 +345,5 @@ def replace(self: Self, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, l
356345
:return:
357346
:rtype: None
358347
"""
348+
# FIXME: key not only string
359349
self._add_related("hermes-rt:replace", key, value)

src/hermes/model/merge/match.py

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,10 @@
55
# SPDX-FileContributor: Michael Meinel
66
# SPDX-FileContributor: Michael Fritzsche
77

8-
from __future__ import annotations
9-
10-
from typing import Any, Callable, TYPE_CHECKING
8+
from typing import Any, Callable
119

1210
from ..types import ld_dict
1311

14-
if TYPE_CHECKING:
15-
from .container import ld_merge_dict
16-
1712

1813
def match_equals(a: Any, b: Any) -> bool:
1914
"""
@@ -30,26 +25,29 @@ def match_equals(a: Any, b: Any) -> bool:
3025
return a == b
3126

3227

33-
def match_keys(
34-
*keys: list[str]
35-
) -> Callable[[ld_merge_dict, ld_dict], bool]:
28+
def match_keys(*keys: list[str], fall_back_to_equals: bool = False) -> Callable[[Any, Any], bool]:
3629
"""
3730
Creates a function taking to parameters that returns true
3831
if both given parameter have at least one common key in the given list of keys
39-
and for all common keys in the given list of keys the values of both objects are the same.
32+
and for all common keys in the given list of keys the values of both objects are the same.<br>
33+
If fall_back_to_equals is True, the returned function returns the value of normal == comparison
34+
if no key from keys is in both objects.
4035
4136
:param keys: The list of important keys for the comparison method.
4237
:type keys: list[str]
38+
:param fall_back_to_equals: Whether or not a fall back option should be used.
39+
:type fall_back_to_equals: bool
4340
4441
:return: A function comparing two given objects values for the keys in keys.
4542
:rtype: Callable[[ld_merge_dict, ld_dict], bool]
4643
"""
4744

4845
# create and return the match function using the given keys
49-
def match_func(left: ld_merge_dict, right: ld_dict) -> bool:
46+
def match_func(left: Any, right: Any) -> bool:
5047
"""
5148
Compares left to right by checking if a) they have at least one common key in a predetermined list of keys and
52-
b) testing if both objects have equal values for all common keys in the predetermined key list.
49+
b) testing if both objects have equal values for all common keys in the predetermined key list.<br>
50+
It may fall back on == if no common key in the predetermined list of keys exists.
5351
5452
:param left: The first object for the comparison.
5553
:type left: ld_merge_dict
@@ -59,12 +57,27 @@ def match_func(left: ld_merge_dict, right: ld_dict) -> bool:
5957
:return: The result of the comparison.
6058
:rtype: bool
6159
"""
62-
# TODO: This method maybe should try == comparison instead of returning false if active_keys == [].
60+
if not (isinstance(left, ld_dict) and isinstance(right, ld_dict)):
61+
return fall_back_to_equals and (left == right)
6362
# create a list of all common important keys
6463
active_keys = [key for key in keys if key in left and key in right]
64+
# fall back to == if no active keys
65+
if fall_back_to_equals and not active_keys:
66+
return left == right
6567
# check if both objects have the same values for all active keys
6668
pairs = [(left[key] == right[key]) for key in active_keys]
6769
# return whether or not both objects had the same values for all active keys
6870
# and there was at least one active key
6971
return len(active_keys) > 0 and all(pairs)
7072
return match_func
73+
74+
75+
def match_person(left: Any, right: Any) -> bool:
76+
if not (isinstance(left, ld_dict) and isinstance(right, ld_dict)):
77+
return left == right
78+
if "@id" in left and "@id" in right:
79+
return left["@id"] == right["@id"]
80+
if "schema:email" in left and "schema:email" in right:
81+
mails_right = right["schema:email"]
82+
return any((mail in mails_right) for mail in left["schema:email"])
83+
return left == right

0 commit comments

Comments
 (0)