forked from MemPalace/mempalace
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_entity_registry.py
More file actions
373 lines (282 loc) · 13 KB
/
test_entity_registry.py
File metadata and controls
373 lines (282 loc) · 13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
"""Tests for mempalace.entity_registry."""
from unittest.mock import patch
from mempalace.entity_registry import (
COMMON_ENGLISH_WORDS,
PERSON_CONTEXT_PATTERNS,
EntityRegistry,
)
# Shared mock result for Wikipedia person lookup tests
_MOCK_SAOIRSE_PERSON = {
"inferred_type": "person",
"confidence": 0.80,
"wiki_summary": "Saoirse is an Irish given name.",
"wiki_title": "Saoirse",
}
# ── COMMON_ENGLISH_WORDS ────────────────────────────────────────────────
def test_common_english_words_has_expected_entries():
assert "ever" in COMMON_ENGLISH_WORDS
assert "grace" in COMMON_ENGLISH_WORDS
assert "will" in COMMON_ENGLISH_WORDS
assert "may" in COMMON_ENGLISH_WORDS
assert "monday" in COMMON_ENGLISH_WORDS
def test_common_english_words_is_lowercase():
for word in COMMON_ENGLISH_WORDS:
assert word == word.lower(), f"{word} should be lowercase"
# ── PERSON_CONTEXT_PATTERNS ─────────────────────────────────────────────
def test_person_context_patterns_is_nonempty():
assert len(PERSON_CONTEXT_PATTERNS) > 0
# ── EntityRegistry creation and empty state ─────────────────────────────
def test_load_from_nonexistent_dir(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
assert registry.people == {}
assert registry.projects == []
assert registry.mode == "personal"
assert registry.ambiguous_flags == []
def test_save_and_load_roundtrip(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(
mode="work",
people=[{"name": "Alice", "relationship": "colleague", "context": "work"}],
projects=["MemPalace"],
)
# Load again from same dir
loaded = EntityRegistry.load(config_dir=tmp_path)
assert loaded.mode == "work"
assert "Alice" in loaded.people
assert "MemPalace" in loaded.projects
def test_save_creates_file(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
registry.save()
assert (tmp_path / "entity_registry.json").exists()
# ── seed ────────────────────────────────────────────────────────────────
def test_seed_registers_people(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(
mode="personal",
people=[
{"name": "Riley", "relationship": "daughter", "context": "personal"},
{"name": "Devon", "relationship": "friend", "context": "personal"},
],
projects=["MemPalace"],
)
assert "Riley" in registry.people
assert "Devon" in registry.people
assert registry.people["Riley"]["relationship"] == "daughter"
assert registry.people["Riley"]["source"] == "onboarding"
assert registry.people["Riley"]["confidence"] == 1.0
def test_seed_registers_projects(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(mode="work", people=[], projects=["Acme", "Widget"])
assert registry.projects == ["Acme", "Widget"]
def test_seed_sets_mode(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(mode="combo", people=[], projects=[])
assert registry.mode == "combo"
def test_seed_flags_ambiguous_names(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(
mode="personal",
people=[
{"name": "Grace", "relationship": "friend", "context": "personal"},
{"name": "Riley", "relationship": "daughter", "context": "personal"},
],
projects=[],
)
assert "grace" in registry.ambiguous_flags
# Riley is not a common English word
assert "riley" not in registry.ambiguous_flags
def test_seed_with_aliases(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(
mode="personal",
people=[{"name": "Maxwell", "relationship": "friend", "context": "personal"}],
projects=[],
aliases={"Max": "Maxwell"},
)
assert "Maxwell" in registry.people
assert "Max" in registry.people
assert registry.people["Max"].get("canonical") == "Maxwell"
def test_seed_skips_empty_names(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(
mode="personal",
people=[{"name": "", "relationship": "", "context": "personal"}],
projects=[],
)
assert len(registry.people) == 0
# ── lookup ──────────────────────────────────────────────────────────────
def test_lookup_known_person(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(
mode="personal",
people=[{"name": "Riley", "relationship": "daughter", "context": "personal"}],
projects=[],
)
result = registry.lookup("Riley")
assert result["type"] == "person"
assert result["confidence"] == 1.0
assert result["name"] == "Riley"
def test_lookup_known_project(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(mode="work", people=[], projects=["MemPalace"])
result = registry.lookup("MemPalace")
assert result["type"] == "project"
assert result["confidence"] == 1.0
def test_lookup_unknown_word(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(mode="personal", people=[], projects=[])
result = registry.lookup("Xyzzy")
assert result["type"] == "unknown"
assert result["confidence"] == 0.0
def test_lookup_case_insensitive(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(
mode="personal",
people=[{"name": "Riley", "relationship": "daughter", "context": "personal"}],
projects=[],
)
result = registry.lookup("riley")
assert result["type"] == "person"
def test_lookup_alias(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(
mode="personal",
people=[{"name": "Maxwell", "relationship": "friend", "context": "personal"}],
projects=[],
aliases={"Max": "Maxwell"},
)
result = registry.lookup("Max")
assert result["type"] == "person"
# ── disambiguation ──────────────────────────────────────────────────────
def test_lookup_ambiguous_word_as_person(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(
mode="personal",
people=[{"name": "Grace", "relationship": "friend", "context": "personal"}],
projects=[],
)
result = registry.lookup("Grace", context="I went with Grace today")
assert result["type"] == "person"
def test_lookup_ambiguous_word_as_concept(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(
mode="personal",
people=[{"name": "Ever", "relationship": "friend", "context": "personal"}],
projects=[],
)
result = registry.lookup("Ever", context="have you ever tried this")
assert result["type"] == "concept"
# ── research — local-only by default ───────────────────────────────────
def test_research_local_only_by_default(tmp_path):
"""research() must NOT call Wikipedia unless allow_network=True."""
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(mode="personal", people=[], projects=[])
with patch(
"mempalace.entity_registry._wikipedia_lookup",
side_effect=AssertionError("network call should not happen"),
):
result = registry.research("Saoirse")
assert result["inferred_type"] == "unknown"
assert result["confidence"] == 0.0
assert result["word"] == "Saoirse"
assert "network lookup disabled" in result.get("note", "")
def test_research_with_allow_network(tmp_path):
"""research(allow_network=True) calls Wikipedia and caches result."""
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(mode="personal", people=[], projects=[])
with patch(
"mempalace.entity_registry._wikipedia_lookup",
return_value=dict(_MOCK_SAOIRSE_PERSON),
):
result = registry.research("Saoirse", auto_confirm=True, allow_network=True)
assert result["inferred_type"] == "person"
def test_research_caches_result(tmp_path):
"""Once cached via allow_network, subsequent calls use cache without network."""
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(mode="personal", people=[], projects=[])
with patch(
"mempalace.entity_registry._wikipedia_lookup",
return_value=dict(_MOCK_SAOIRSE_PERSON),
):
result = registry.research("Saoirse", auto_confirm=True, allow_network=True)
assert result["inferred_type"] == "person"
# Second call should use cache, not call Wikipedia again
with patch(
"mempalace.entity_registry._wikipedia_lookup",
side_effect=AssertionError("should not be called"),
):
cached = registry.research("Saoirse")
assert cached["inferred_type"] == "person"
def test_research_local_only_not_cached(tmp_path):
"""Local-only result for uncached word should NOT be persisted to cache."""
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(mode="personal", people=[], projects=[])
registry.research("Xander") # local-only, no network
assert "Xander" not in registry._data.get("wiki_cache", {})
def test_confirm_research_adds_to_people(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(mode="personal", people=[], projects=[])
with patch(
"mempalace.entity_registry._wikipedia_lookup",
return_value=dict(_MOCK_SAOIRSE_PERSON),
):
registry.research("Saoirse", auto_confirm=False, allow_network=True)
registry.confirm_research("Saoirse", entity_type="person", relationship="friend")
assert "Saoirse" in registry.people
assert registry.people["Saoirse"]["source"] == "wiki"
def test_wikipedia_404_returns_unknown(tmp_path):
"""A 404 from Wikipedia should return 'unknown', not assert 'person'."""
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(mode="personal", people=[], projects=[])
mock_result = {
"inferred_type": "unknown",
"confidence": 0.3,
"wiki_summary": None,
"wiki_title": None,
"note": "not found in Wikipedia",
}
with patch("mempalace.entity_registry._wikipedia_lookup", return_value=mock_result):
result = registry.research("Zzxqy", auto_confirm=False, allow_network=True)
assert result["inferred_type"] == "unknown"
assert result["confidence"] < 0.5
# ── extract_people_from_query ───────────────────────────────────────────
def test_extract_people_from_query(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(
mode="personal",
people=[
{"name": "Riley", "relationship": "daughter", "context": "personal"},
{"name": "Devon", "relationship": "friend", "context": "personal"},
],
projects=[],
)
found = registry.extract_people_from_query("What did Riley say about the weather?")
assert "Riley" in found
assert "Devon" not in found
# ── extract_unknown_candidates ──────────────────────────────────────────
def test_extract_unknown_candidates(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(mode="personal", people=[], projects=[])
unknowns = registry.extract_unknown_candidates("Saoirse went to the store")
assert "Saoirse" in unknowns
def test_extract_unknown_candidates_skips_known(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(
mode="personal",
people=[{"name": "Riley", "relationship": "daughter", "context": "personal"}],
projects=[],
)
unknowns = registry.extract_unknown_candidates("Riley went to the store")
assert "Riley" not in unknowns
# ── summary ─────────────────────────────────────────────────────────────
def test_summary(tmp_path):
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(
mode="personal",
people=[{"name": "Riley", "relationship": "daughter", "context": "personal"}],
projects=["MemPalace"],
)
s = registry.summary()
assert "personal" in s
assert "Riley" in s
assert "MemPalace" in s