diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py index f0edfbb1b..c57785ab9 100644 --- a/mempalace/mcp_server.py +++ b/mempalace/mcp_server.py @@ -20,22 +20,47 @@ mempalace_reconnect — force cache invalidation and reconnect after external writes """ -import argparse import os import sys -import json -import logging -import hashlib -import time -from datetime import datetime -from pathlib import Path - -from .config import MempalaceConfig, sanitize_kg_value, sanitize_name, sanitize_content -from .version import __version__ -from .backends.chroma import ChromaBackend, ChromaCollection -from .query_sanitizer import sanitize_query -from .searcher import search_memories -from .palace_graph import ( + +# --- MCP stdio protection (issue #225) ----------------------------------- +# The MCP protocol multiplexes JSON-RPC over stdio: stdout MUST carry only +# valid JSON-RPC messages, stderr is for human-readable logs. Some +# transitive dependencies (chromadb → onnxruntime, posthog telemetry) print +# banners and error messages directly to stdout — sometimes at C level — +# which breaks Claude Desktop's JSON parser. Redirect stdout → stderr at +# both the Python and file-descriptor level before heavy imports, then +# restore the real stdout in main() before entering the protocol loop. +_REAL_STDOUT = sys.stdout +_REAL_STDOUT_FD = None +try: + _REAL_STDOUT_FD = os.dup(1) + os.dup2(2, 1) +except (OSError, AttributeError): + # Environments without fd-level stdio (embedded interpreters, some test + # harnesses). The Python-level redirect below still applies. + pass +sys.stdout = sys.stderr + +import argparse # noqa: E402 (deferred until after stdio protection above) +import json # noqa: E402 +import logging # noqa: E402 +import hashlib # noqa: E402 +import time # noqa: E402 +from datetime import datetime # noqa: E402 +from pathlib import Path # noqa: E402 + +from .config import ( # noqa: E402 + MempalaceConfig, + sanitize_kg_value, + sanitize_name, + sanitize_content, +) +from .version import __version__ # noqa: E402 +from .backends.chroma import ChromaBackend, ChromaCollection # noqa: E402 +from .query_sanitizer import sanitize_query # noqa: E402 +from .searcher import search_memories # noqa: E402 +from .palace_graph import ( # noqa: E402 traverse, find_tunnels, graph_stats, @@ -45,7 +70,7 @@ follow_tunnels, ) -from .knowledge_graph import KnowledgeGraph +from .knowledge_graph import KnowledgeGraph # noqa: E402 logging.basicConfig(level=logging.INFO, format="%(message)s", stream=sys.stderr) logger = logging.getLogger("mempalace_mcp") @@ -1641,7 +1666,21 @@ def handle_request(request): } +def _restore_stdout(): + """Restore real stdout for MCP JSON-RPC output (see issue #225).""" + global _REAL_STDOUT, _REAL_STDOUT_FD + if _REAL_STDOUT_FD is not None: + try: + os.dup2(_REAL_STDOUT_FD, 1) + os.close(_REAL_STDOUT_FD) + except OSError: + pass + _REAL_STDOUT_FD = None + sys.stdout = _REAL_STDOUT + + def main(): + _restore_stdout() logger.info("MemPalace MCP Server starting...") while True: try: diff --git a/tests/test_mcp_stdio_protection.py b/tests/test_mcp_stdio_protection.py new file mode 100644 index 000000000..8b3731bfd --- /dev/null +++ b/tests/test_mcp_stdio_protection.py @@ -0,0 +1,83 @@ +"""Regression tests for issue #225 — MCP stdio protection. + +The MCP protocol multiplexes JSON-RPC over stdio. Stdout MUST carry only +valid JSON-RPC messages. Several transitive deps (chromadb → onnxruntime, +posthog telemetry) print banners and warnings to stdout — sometimes at +the C level — which broke Claude Desktop's JSON parser on Windows. + +The fix in mcp_server.py redirects stdout → stderr at both the Python +and file-descriptor level during module import, then restores the real +stdout in main() before entering the protocol loop. +""" + +import subprocess +import sys +import textwrap + + +def test_module_import_redirects_stdout_to_stderr(): + """At import time, sys.stdout must point at sys.stderr so any stray + print() from a transitive dependency is sent to stderr.""" + code = textwrap.dedent( + """ + import sys + original_stdout = sys.stdout + from mempalace import mcp_server + assert sys.stdout is sys.stderr, ( + f"Expected sys.stdout to be redirected to sys.stderr, " + f"got: {sys.stdout!r}" + ) + assert mcp_server._REAL_STDOUT is original_stdout, ( + "mcp_server._REAL_STDOUT must hold the original stdout" + ) + print("OK", file=sys.stderr) + """ + ) + result = subprocess.run( + [sys.executable, "-c", code], + capture_output=True, + timeout=60, + ) + assert result.returncode == 0, f"stdout: {result.stdout!r}\nstderr: {result.stderr!r}" + + +def test_restore_stdout_returns_real_stdout(): + """_restore_stdout() must reassign sys.stdout to the original handle + so main() can write JSON-RPC responses to the real stdout.""" + code = textwrap.dedent( + """ + import sys + original_stdout = sys.stdout + from mempalace import mcp_server + assert sys.stdout is sys.stderr + mcp_server._restore_stdout() + assert sys.stdout is original_stdout, ( + f"After _restore_stdout(), sys.stdout must be the original; " + f"got: {sys.stdout!r}" + ) + mcp_server._restore_stdout() # idempotent + print("OK", file=sys.stderr) + """ + ) + result = subprocess.run( + [sys.executable, "-c", code], + capture_output=True, + timeout=60, + ) + assert result.returncode == 0, f"stdout: {result.stdout!r}\nstderr: {result.stderr!r}" + + +def test_mcp_server_no_stdout_noise_on_clean_exit(): + """`python -m mempalace.mcp_server` with empty stdin must produce + nothing on stdout. Empty input → readline() returns '' → main() + breaks out cleanly. Any stdout content here would corrupt the + JSON-RPC stream in real use.""" + proc = subprocess.run( + [sys.executable, "-m", "mempalace.mcp_server"], + input=b"", + capture_output=True, + timeout=60, + ) + assert ( + proc.stdout == b"" + ), f"stdout must be empty before the first JSON-RPC response, but got: {proc.stdout!r}"