Skip to content

Commit 7ede231

Browse files
authored
Merge pull request #1167 from arnoldwender/fix/kg-date-validation
fix(kg): validate ISO-8601 date formats at MCP boundary
2 parents 3824ea6 + abe8576 commit 7ede231

4 files changed

Lines changed: 162 additions & 1 deletion

File tree

mempalace/config.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,38 @@ def sanitize_kg_value(value: str, field_name: str = "value") -> str:
8181
return value
8282

8383

84+
# ISO-8601 date validator for knowledge-graph temporal parameters
85+
# (as_of, valid_from, valid_to, ended). Parameterized queries already
86+
# prevent SQL injection, but unvalidated date strings silently miss
87+
# every row — callers cannot distinguish "no fact at this time" from
88+
# "your date format was unrecognized." Require full YYYY-MM-DD: KG
89+
# queries compare TEXT dates lexicographically, so partials like "2026"
90+
# would re-introduce silent empty results (e.g. "2026-01-01" <= "2026"
91+
# is False), defeating the purpose of validation.
92+
_ISO_DATE_RE = re.compile(r"^\d{4}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])$")
93+
94+
95+
def sanitize_iso_date(value, field_name: str = "date"):
96+
"""Validate an ISO-8601 date string, accepting None or empty as-is.
97+
98+
Accepts only ``YYYY-MM-DD``. Raises ValueError on any other
99+
non-empty input so the MCP layer can surface a clear error to the
100+
caller instead of silently returning empty results. Partial dates
101+
(``YYYY``, ``YYYY-MM``) are rejected because KG queries compare
102+
TEXT dates lexicographically and would silently exclude valid facts.
103+
"""
104+
if value is None or value == "":
105+
return value
106+
if not isinstance(value, str):
107+
raise ValueError(f"{field_name} must be a string")
108+
value = value.strip()
109+
if not _ISO_DATE_RE.match(value):
110+
raise ValueError(
111+
f"{field_name}={value!r} is not a valid ISO-8601 date " f"(expected YYYY-MM-DD)"
112+
)
113+
return value
114+
115+
84116
def sanitize_content(value: str, max_length: int = 100_000) -> str:
85117
"""Validate drawer/diary content length."""
86118
if not isinstance(value, str) or not value.strip():

mempalace/mcp_server.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
sanitize_kg_value,
5656
sanitize_name,
5757
sanitize_content,
58+
sanitize_iso_date,
5859
)
5960
from .version import __version__ # noqa: E402
6061
from chromadb.errors import NotFoundError as _ChromaNotFoundError # noqa: E402
@@ -1059,6 +1060,7 @@ def tool_kg_query(entity: str, as_of: str = None, direction: str = "both"):
10591060
"""Query the knowledge graph for an entity's relationships."""
10601061
try:
10611062
entity = sanitize_kg_value(entity, "entity")
1063+
as_of = sanitize_iso_date(as_of, "as_of")
10621064
except ValueError as e:
10631065
return {"error": str(e)}
10641066
if direction not in ("outgoing", "incoming", "both"):
@@ -1092,6 +1094,7 @@ def tool_kg_add(
10921094
subject = sanitize_kg_value(subject, "subject")
10931095
predicate = sanitize_name(predicate, "predicate")
10941096
object = sanitize_kg_value(object, "object")
1097+
valid_from = sanitize_iso_date(valid_from, "valid_from")
10951098
except ValueError as e:
10961099
return {"success": False, "error": str(e)}
10971100

@@ -1135,6 +1138,7 @@ def tool_kg_invalidate(subject: str, predicate: str, object: str, ended: str = N
11351138
subject = sanitize_kg_value(subject, "subject")
11361139
predicate = sanitize_name(predicate, "predicate")
11371140
object = sanitize_kg_value(object, "object")
1141+
ended = sanitize_iso_date(ended, "ended")
11381142
except ValueError as e:
11391143
return {"success": False, "error": str(e)}
11401144
resolved_ended = ended or date.today().isoformat()

tests/test_config.py

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,13 @@
33
import tempfile
44

55
import pytest
6-
from mempalace.config import MempalaceConfig, normalize_wing_name, sanitize_kg_value, sanitize_name
6+
from mempalace.config import (
7+
MempalaceConfig,
8+
normalize_wing_name,
9+
sanitize_iso_date,
10+
sanitize_kg_value,
11+
sanitize_name,
12+
)
713

814

915
def test_default_config():
@@ -212,3 +218,69 @@ def test_kg_value_rejects_null_bytes():
212218
def test_kg_value_rejects_over_length():
213219
with pytest.raises(ValueError):
214220
sanitize_kg_value("a" * 129)
221+
222+
223+
# --- sanitize_iso_date ---
224+
225+
226+
def test_iso_date_rejects_year_only():
227+
# Partial dates re-introduce silent empty result sets via lexicographic
228+
# TEXT comparison in KG queries (e.g. "2026-01-01" <= "2026" is False).
229+
with pytest.raises(ValueError):
230+
sanitize_iso_date("2026")
231+
232+
233+
def test_iso_date_rejects_year_month():
234+
with pytest.raises(ValueError):
235+
sanitize_iso_date("2026-03")
236+
237+
238+
def test_iso_date_accepts_full_date():
239+
assert sanitize_iso_date("2026-03-15") == "2026-03-15"
240+
241+
242+
def test_iso_date_passes_through_none():
243+
assert sanitize_iso_date(None) is None
244+
245+
246+
def test_iso_date_passes_through_empty_string():
247+
assert sanitize_iso_date("") == ""
248+
249+
250+
def test_iso_date_strips_whitespace():
251+
assert sanitize_iso_date(" 2026-03-15 ") == "2026-03-15"
252+
253+
254+
def test_iso_date_rejects_natural_language():
255+
with pytest.raises(ValueError):
256+
sanitize_iso_date("March 2026")
257+
258+
259+
def test_iso_date_rejects_abbreviated_month():
260+
with pytest.raises(ValueError):
261+
sanitize_iso_date("Jan 2025")
262+
263+
264+
def test_iso_date_rejects_us_format():
265+
with pytest.raises(ValueError):
266+
sanitize_iso_date("03/15/2026")
267+
268+
269+
def test_iso_date_rejects_invalid_month():
270+
with pytest.raises(ValueError):
271+
sanitize_iso_date("2026-13")
272+
273+
274+
def test_iso_date_rejects_invalid_day():
275+
with pytest.raises(ValueError):
276+
sanitize_iso_date("2026-02-32")
277+
278+
279+
def test_iso_date_rejects_non_string():
280+
with pytest.raises(ValueError):
281+
sanitize_iso_date(20260315)
282+
283+
284+
def test_iso_date_error_names_field():
285+
with pytest.raises(ValueError, match="valid_from"):
286+
sanitize_iso_date("yesterday", "valid_from")

tests/test_mcp_server.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,59 @@ def test_kg_stats(self, monkeypatch, config, palace_path, seeded_kg):
788788
result = tool_kg_stats()
789789
assert result["entities"] >= 4
790790

791+
# --- Date validation at the MCP boundary (issue #1164) ---
792+
793+
def test_kg_add_rejects_invalid_valid_from(self, monkeypatch, config, palace_path, kg):
794+
_patch_mcp_server(monkeypatch, config, kg)
795+
from mempalace.mcp_server import tool_kg_add
796+
797+
result = tool_kg_add(
798+
subject="Alice",
799+
predicate="likes",
800+
object="coffee",
801+
valid_from="Jan 2025",
802+
)
803+
assert result["success"] is False
804+
assert "valid_from" in result["error"]
805+
assert "ISO-8601" in result["error"]
806+
807+
def test_kg_query_rejects_invalid_as_of(self, monkeypatch, config, palace_path, seeded_kg):
808+
_patch_mcp_server(monkeypatch, config, seeded_kg)
809+
from mempalace.mcp_server import tool_kg_query
810+
811+
result = tool_kg_query(entity="Max", as_of="March 2026")
812+
assert "error" in result
813+
assert "as_of" in result["error"]
814+
815+
def test_kg_invalidate_rejects_invalid_ended(self, monkeypatch, config, palace_path, seeded_kg):
816+
_patch_mcp_server(monkeypatch, config, seeded_kg)
817+
from mempalace.mcp_server import tool_kg_invalidate
818+
819+
result = tool_kg_invalidate(
820+
subject="Max",
821+
predicate="does",
822+
object="chess",
823+
ended="yesterday",
824+
)
825+
assert result["success"] is False
826+
assert "ended" in result["error"]
827+
828+
def test_kg_query_rejects_partial_iso_dates(self, monkeypatch, config, palace_path, seeded_kg):
829+
_patch_mcp_server(monkeypatch, config, seeded_kg)
830+
from mempalace.mcp_server import tool_kg_query
831+
832+
# Partial ISO dates are rejected: KG queries compare TEXT dates
833+
# lexicographically, so "2026-01-01" <= "2026" is False, which
834+
# silently excludes facts. Reject at the boundary — only YYYY-MM-DD
835+
# produces correct results.
836+
for value in ("2026", "2026-03"):
837+
result = tool_kg_query(entity="Max", as_of=value)
838+
assert "error" in result, f"accepted partial date {value!r}: {result}"
839+
840+
# Full ISO-8601 dates still pass.
841+
result = tool_kg_query(entity="Max", as_of="2026-03-15")
842+
assert "error" not in result, f"rejected valid date: {result}"
843+
791844

792845
# ── Diary Tools ─────────────────────────────────────────────────────────
793846

0 commit comments

Comments
 (0)