diff --git a/hooks/mempal_precompact_hook.sh b/hooks/mempal_precompact_hook.sh index 3a74ca54e..a05798dfb 100755 --- a/hooks/mempal_precompact_hook.sh +++ b/hooks/mempal_precompact_hook.sh @@ -57,7 +57,14 @@ MEMPAL_DIR="" # Read JSON input from stdin INPUT=$(cat) -SESSION_ID=$(echo "$INPUT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('session_id','unknown'))" 2>/dev/null) +# Parse session_id with sanitization (match save hook's safe() pattern) +SESSION_ID=$(echo "$INPUT" | python3 -c " +import sys, json, re +data = json.load(sys.stdin) +sid = re.sub(r'[^a-zA-Z0-9_\-]', '', str(data.get('session_id', 'unknown'))) +print(sid or 'unknown') +" 2>/dev/null) +[ -z "$SESSION_ID" ] && SESSION_ID="unknown" echo "[$(date '+%H:%M:%S')] PRE-COMPACT triggered for session $SESSION_ID" >> "$STATE_DIR/hook.log" diff --git a/mempalace/split_mega_files.py b/mempalace/split_mega_files.py index c5b8d5dcd..6b702f80b 100644 --- a/mempalace/split_mega_files.py +++ b/mempalace/split_mega_files.py @@ -141,7 +141,7 @@ def extract_people(lines): # Speaker tags: "Alice:", "Ben:", etc. for person in KNOWN_PEOPLE: - if re.search(rf"\b{person}\b", text, re.IGNORECASE): + if re.search(rf"\b{re.escape(person)}\b", text, re.IGNORECASE): found.add(person) # Working directory username hint — map to known people if configured diff --git a/tests/test_split_mega_files.py b/tests/test_split_mega_files.py index c1db02bff..020a49a7c 100644 --- a/tests/test_split_mega_files.py +++ b/tests/test_split_mega_files.py @@ -51,6 +51,30 @@ def test_extract_people_detects_names_from_content(monkeypatch): assert people == ["Alice", "Ben"] +def test_extract_people_escapes_regex_metacharacters(monkeypatch): + """Names with regex metacharacters must not crash or false-match.""" + # Dr. Smith: dot must match literal dot, not any character + monkeypatch.setattr(smf, "KNOWN_PEOPLE", ["Dr. Smith"]) + smf._KNOWN_NAMES_CACHE = None + + lines_exact = ["Talked with Dr. Smith about plans\n"] + assert "Dr. Smith" in smf.extract_people(lines_exact) + + lines_false = ["Talked with Dr_ Smith about plans\n"] + assert "Dr. Smith" not in smf.extract_people(lines_false) + + +def test_extract_people_no_crash_on_regex_metacharacters(monkeypatch): + """Names containing brackets, parens, or plus signs must not crash.""" + dangerous_names = ["C++", "Mary (Smith)", "[Admin]", "A*B"] + monkeypatch.setattr(smf, "KNOWN_PEOPLE", dangerous_names) + smf._KNOWN_NAMES_CACHE = None + + # Should not raise re.error — just run without crashing + result = smf.extract_people(["Some text with C++ mentioned\n"]) + assert isinstance(result, list) + + # ── Config: force_reload and invalid JSON ──────────────────────────────