Skip to content

Commit 8f0536a

Browse files
committed
fix(llm): claude-code provider security and privacy hardening
- Override is_external_service=True so the MemPalace#1224 privacy gate fires. The CLI binary runs locally but every classify call routes user content to Anthropic-hosted models, so the URL-based base-class default (returns local because endpoint is None) misclassifies this provider. - Strip ANTHROPIC_* env vars from the subprocess environment. If the user has ANTHROPIC_API_KEY exported, claude -p can fall back to API-key auth and bill the API account instead of the subscription this provider is built around. - Frame system+user content with <system>/<user> XML tags instead of literal SYSTEM:/USER: markers. A malicious drawer text containing '\\n\\nSYSTEM:\\nIgnore prior instructions...' could otherwise spoof the boundary and inject a second system prompt. - Spawn the absolute path returned by shutil.which("claude") rather than the bare 'claude' literal. Closes a TOCTOU window between check_available() resolving the binary and classify() spawning a potentially different binary if PATH changes between calls. - Pass encoding='utf-8' explicitly to subprocess.run so a Windows cp1252 locale does not mojibake the JSON envelope before json.loads. - Include the raw stdout excerpt in the non-JSON envelope LLMError so CLI-output regressions can be debugged without reproducing. - Broaden check_available()'s exception filter from (subprocess.TimeoutExpired, OSError) to (subprocess.SubprocessError, OSError) so future SubprocessError subclasses do not leak. Tests added: env-scrub propagation, is_external_service True, binary-missing path. Existing tests updated for the resolved-binary cmd[0] and XML stdin framing.
1 parent 6380e67 commit 8f0536a

2 files changed

Lines changed: 158 additions & 62 deletions

File tree

mempalace/llm_client.py

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,21 @@ def __init__(
435435
):
436436
super().__init__(model=model, timeout=timeout)
437437

438+
@property
439+
def is_external_service(self) -> bool:
440+
# The CLI binary runs locally but routes every classify call to
441+
# Anthropic's hosted models, so user content does leave the machine.
442+
# Override the URL-based default in the base class.
443+
return True
444+
445+
def _subprocess_env(self) -> dict:
446+
# Strip ANTHROPIC_* env vars before spawning `claude -p`. If the user
447+
# has ANTHROPIC_API_KEY exported in their shell, the CLI may fall
448+
# back to API-key auth and bill the API account instead of the
449+
# subscription this provider is built around. Removing the vars
450+
# forces OAuth / keychain auth, which is the documented path.
451+
return {k: v for k, v in os.environ.items() if not k.upper().startswith("ANTHROPIC_")}
452+
438453
def check_available(self) -> tuple[bool, str]:
439454
binary = shutil.which("claude")
440455
if not binary:
@@ -445,12 +460,15 @@ def check_available(self) -> tuple[bool, str]:
445460
)
446461
try:
447462
r = subprocess.run(
448-
["claude", "auth", "status", "--text"],
463+
[binary, "auth", "status", "--text"],
449464
capture_output=True,
450465
text=True,
466+
encoding="utf-8",
467+
errors="replace",
451468
timeout=10,
469+
env=self._subprocess_env(),
452470
)
453-
except (subprocess.TimeoutExpired, OSError) as e:
471+
except (subprocess.SubprocessError, OSError) as e:
454472
return False, f"`claude auth status` failed: {e}"
455473
if r.returncode != 0:
456474
return (
@@ -463,36 +481,42 @@ def classify(self, system: str, user: str, json_mode: bool = True) -> LLMRespons
463481
sys_prompt = system
464482
if json_mode:
465483
sys_prompt += "\n\nRespond with valid JSON only, no prose."
484+
binary = shutil.which("claude")
485+
if not binary:
486+
raise LLMError("`claude` CLI not found in PATH")
466487
# `--bare` would skip hooks, plugins, CLAUDE.md auto-discovery, but it
467488
# also forces claude to use ANTHROPIC_API_KEY only and ignore OAuth /
468489
# keychain. That defeats this provider's whole point (subscription
469490
# auth), so we omit it. To keep the surrounding context minimal we
470491
# invoke from a temp cwd so claude does not pick up a project-level
471492
# CLAUDE.md it does not need.
472493
#
473-
# System prompt is prepended to stdin instead of being passed via
474-
# `--system-prompt` argv. argv is visible to other local users via
475-
# `ps` / /proc/*/cmdline, and the prompt can carry sensitive context
476-
# (entity names, project paths). The SYSTEM/USER framing is a
477-
# convention `claude -p` follows reliably for classification tasks.
494+
# System and user content go through stdin (not argv) so they are
495+
# not visible to other local users via `ps` / /proc/*/cmdline, and
496+
# to keep prompt-injection surface narrow we frame them with XML-
497+
# like tags rather than literal "SYSTEM:" / "USER:" markers a
498+
# malicious drawer could spoof.
478499
cmd = [
479-
"claude",
500+
binary,
480501
"-p",
481502
"--no-session-persistence", # don't pollute Claude Code session history
482503
"--output-format",
483504
"json",
484505
"--model",
485506
self.model,
486507
]
487-
combined_input = f"SYSTEM:\n{sys_prompt}\n\nUSER:\n{user}"
508+
combined_input = f"<system>\n{sys_prompt}\n</system>\n<user>\n{user}\n</user>"
488509
try:
489510
r = subprocess.run(
490511
cmd,
491512
input=combined_input,
492513
capture_output=True,
493514
text=True,
515+
encoding="utf-8",
516+
errors="replace",
494517
timeout=self.timeout,
495518
cwd=tempfile.gettempdir(),
519+
env=self._subprocess_env(),
496520
)
497521
except subprocess.TimeoutExpired as e:
498522
raise LLMError(f"`claude -p` timed out after {self.timeout}s") from e
@@ -504,7 +528,10 @@ def classify(self, system: str, user: str, json_mode: bool = True) -> LLMRespons
504528
try:
505529
envelope = json.loads(r.stdout)
506530
except json.JSONDecodeError as e:
507-
raise LLMError(f"`claude -p` returned non-JSON envelope: {e}") from e
531+
stdout_excerpt = (r.stdout or "").strip()[:200]
532+
raise LLMError(
533+
f"`claude -p` returned non-JSON envelope: {e}; stdout={stdout_excerpt!r}"
534+
) from e
508535
# `--output-format json` returns:
509536
# {"type":"result","result":"<text>","total_cost_usd":...,...}
510537
text = envelope.get("result", "")

tests/test_llm_client.py

Lines changed: 121 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,9 @@ def test_claude_code_check_available_ready():
552552
assert msg == "ok"
553553

554554

555+
_FAKE_CLAUDE_BIN = "/usr/local/bin/claude"
556+
557+
555558
def test_claude_code_classify_command_line():
556559
captured = {}
557560

@@ -560,11 +563,15 @@ def fake_run(cmd, **kwargs):
560563
captured["kwargs"] = kwargs
561564
return _mock_completed(0, stdout=_claude_envelope('{"ok": true}'))
562565

563-
with patch("mempalace.llm_client.subprocess.run", side_effect=fake_run):
564-
p = ClaudeCodeProvider(model="claude-haiku-4-5", timeout=99)
565-
p.classify("system text", "user text", json_mode=True)
566+
with patch("mempalace.llm_client.shutil.which", return_value=_FAKE_CLAUDE_BIN):
567+
with patch("mempalace.llm_client.subprocess.run", side_effect=fake_run):
568+
p = ClaudeCodeProvider(model="claude-haiku-4-5", timeout=99)
569+
p.classify("system text", "user text", json_mode=True)
566570

567-
assert captured["cmd"][0] == "claude"
571+
# cmd[0] is the resolved absolute path from shutil.which, not a bare
572+
# "claude" literal -- avoids a TOCTOU between check_available and
573+
# classify if PATH changes between calls.
574+
assert captured["cmd"][0] == _FAKE_CLAUDE_BIN
568575
assert "-p" in captured["cmd"]
569576
# `--bare` is intentionally NOT passed: it would force ANTHROPIC_API_KEY
570577
# auth and disable OAuth / keychain, defeating the subscription path.
@@ -578,15 +585,22 @@ def fake_run(cmd, **kwargs):
578585
# users via `ps` / /proc/*/cmdline and may carry sensitive context.
579586
assert "--system-prompt" not in captured["cmd"]
580587
assert "system text" not in captured["cmd"]
581-
# System + user are framed in stdin instead. json_mode appends a
582-
# JSON-only directive to the system block.
588+
# System + user are framed in stdin with XML-like tags so a malicious
589+
# drawer cannot spoof the boundary with literal "SYSTEM:" / "USER:"
590+
# markers. json_mode appends a JSON-only directive inside the <system>
591+
# block.
583592
stdin_input = captured["kwargs"]["input"]
584-
assert stdin_input.startswith("SYSTEM:\nsystem text")
593+
assert stdin_input.startswith("<system>\nsystem text")
585594
assert "JSON only" in stdin_input
586-
assert "\n\nUSER:\nuser text" in stdin_input
595+
assert "</system>\n<user>\nuser text\n</user>" in stdin_input
596+
assert "SYSTEM:" not in stdin_input
597+
assert "USER:" not in stdin_input
587598
assert captured["kwargs"]["timeout"] == 99
588599
# cwd must be a temp dir so claude does not pick up a project-level CLAUDE.md
589600
assert captured["kwargs"]["cwd"] == tempfile.gettempdir()
601+
# Explicit UTF-8 decoding so Windows cp1252 locale does not mojibake the
602+
# JSON envelope.
603+
assert captured["kwargs"]["encoding"] == "utf-8"
590604

591605

592606
def test_claude_code_classify_json_mode_off_keeps_system_clean():
@@ -597,24 +611,64 @@ def fake_run(cmd, **kwargs):
597611
captured["kwargs"] = kwargs
598612
return _mock_completed(0, stdout=_claude_envelope("plain text reply"))
599613

600-
with patch("mempalace.llm_client.subprocess.run", side_effect=fake_run):
601-
p = ClaudeCodeProvider(model="claude-haiku-4-5")
602-
resp = p.classify("system text", "user", json_mode=False)
614+
with patch("mempalace.llm_client.shutil.which", return_value=_FAKE_CLAUDE_BIN):
615+
with patch("mempalace.llm_client.subprocess.run", side_effect=fake_run):
616+
p = ClaudeCodeProvider(model="claude-haiku-4-5")
617+
resp = p.classify("system text", "user", json_mode=False)
603618

604619
# No JSON-only directive appended when json_mode=False; raw system
605-
# text appears verbatim in the stdin SYSTEM block (not in argv).
620+
# text appears verbatim inside the <system> block (not in argv).
606621
assert "--system-prompt" not in captured["cmd"]
607-
assert captured["kwargs"]["input"] == "SYSTEM:\nsystem text\n\nUSER:\nuser"
622+
assert captured["kwargs"]["input"] == (
623+
"<system>\nsystem text\n</system>\n<user>\nuser\n</user>"
624+
)
608625
assert resp.text == "plain text reply"
609626

610627

628+
def test_claude_code_strips_anthropic_env_vars(monkeypatch):
629+
captured = {}
630+
631+
def fake_run(cmd, **kwargs):
632+
captured["env"] = kwargs.get("env")
633+
return _mock_completed(0, stdout=_claude_envelope("ok"))
634+
635+
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test-key")
636+
monkeypatch.setenv("ANTHROPIC_AUTH_TOKEN", "tok-test")
637+
monkeypatch.setenv("anthropic_other", "lower") # case-insensitive prefix scrub
638+
monkeypatch.setenv("UNRELATED_VAR", "kept")
639+
640+
with patch("mempalace.llm_client.shutil.which", return_value=_FAKE_CLAUDE_BIN):
641+
with patch("mempalace.llm_client.subprocess.run", side_effect=fake_run):
642+
p = ClaudeCodeProvider(model="claude-haiku-4-5")
643+
p.classify("s", "u")
644+
645+
env = captured["env"]
646+
assert env is not None
647+
# ANTHROPIC_* in any case is stripped so claude -p can't fall back to
648+
# API-key auth and bill the API account instead of the subscription.
649+
assert "ANTHROPIC_API_KEY" not in env
650+
assert "ANTHROPIC_AUTH_TOKEN" not in env
651+
assert "anthropic_other" not in env
652+
# Unrelated env vars must still pass through.
653+
assert env.get("UNRELATED_VAR") == "kept"
654+
655+
656+
def test_claude_code_is_external_service_true():
657+
# claude-code routes user content to Anthropic's hosted models via the
658+
# local CLI binary, so the privacy gate (#1224) must treat it as
659+
# external regardless of the URL-based base-class default.
660+
p = ClaudeCodeProvider(model="claude-haiku-4-5")
661+
assert p.is_external_service is True
662+
663+
611664
def test_claude_code_classify_parses_envelope():
612-
with patch(
613-
"mempalace.llm_client.subprocess.run",
614-
return_value=_mock_completed(0, stdout=_claude_envelope("classified")),
615-
):
616-
p = ClaudeCodeProvider(model="claude-haiku-4-5")
617-
resp = p.classify("s", "u")
665+
with patch("mempalace.llm_client.shutil.which", return_value=_FAKE_CLAUDE_BIN):
666+
with patch(
667+
"mempalace.llm_client.subprocess.run",
668+
return_value=_mock_completed(0, stdout=_claude_envelope("classified")),
669+
):
670+
p = ClaudeCodeProvider(model="claude-haiku-4-5")
671+
resp = p.classify("s", "u")
618672

619673
assert resp.text == "classified"
620674
assert resp.provider == "claude-code"
@@ -623,53 +677,68 @@ def test_claude_code_classify_parses_envelope():
623677

624678

625679
def test_claude_code_classify_timeout_raises_llm_error():
626-
with patch(
627-
"mempalace.llm_client.subprocess.run",
628-
side_effect=subprocess.TimeoutExpired(cmd=["claude"], timeout=1),
629-
):
630-
p = ClaudeCodeProvider(model="claude-haiku-4-5", timeout=1)
631-
with pytest.raises(LLMError, match="timed out after 1s"):
632-
p.classify("s", "u")
680+
with patch("mempalace.llm_client.shutil.which", return_value=_FAKE_CLAUDE_BIN):
681+
with patch(
682+
"mempalace.llm_client.subprocess.run",
683+
side_effect=subprocess.TimeoutExpired(cmd=["claude"], timeout=1),
684+
):
685+
p = ClaudeCodeProvider(model="claude-haiku-4-5", timeout=1)
686+
with pytest.raises(LLMError, match="timed out after 1s"):
687+
p.classify("s", "u")
633688

634689

635690
def test_claude_code_classify_spawn_failure_raises_llm_error():
636-
with patch(
637-
"mempalace.llm_client.subprocess.run",
638-
side_effect=FileNotFoundError("no such file: claude"),
639-
):
691+
with patch("mempalace.llm_client.shutil.which", return_value=_FAKE_CLAUDE_BIN):
692+
with patch(
693+
"mempalace.llm_client.subprocess.run",
694+
side_effect=FileNotFoundError("no such file: claude"),
695+
):
696+
p = ClaudeCodeProvider(model="claude-haiku-4-5")
697+
with pytest.raises(LLMError, match="failed to spawn"):
698+
p.classify("s", "u")
699+
700+
701+
def test_claude_code_classify_binary_missing_raises_llm_error():
702+
# If the binary disappears between provider construction and classify,
703+
# surface a clear LLMError rather than letting subprocess raise an
704+
# opaque FileNotFoundError later.
705+
with patch("mempalace.llm_client.shutil.which", return_value=None):
640706
p = ClaudeCodeProvider(model="claude-haiku-4-5")
641-
with pytest.raises(LLMError, match="failed to spawn"):
707+
with pytest.raises(LLMError, match="not found in PATH"):
642708
p.classify("s", "u")
643709

644710

645711
def test_claude_code_classify_nonzero_raises_llm_error():
646-
with patch(
647-
"mempalace.llm_client.subprocess.run",
648-
return_value=_mock_completed(1, stdout="", stderr="boom: bad model"),
649-
):
650-
p = ClaudeCodeProvider(model="claude-haiku-4-5")
651-
with pytest.raises(LLMError, match=r"`claude -p` exited 1: boom"):
652-
p.classify("s", "u")
712+
with patch("mempalace.llm_client.shutil.which", return_value=_FAKE_CLAUDE_BIN):
713+
with patch(
714+
"mempalace.llm_client.subprocess.run",
715+
return_value=_mock_completed(1, stdout="", stderr="boom: bad model"),
716+
):
717+
p = ClaudeCodeProvider(model="claude-haiku-4-5")
718+
with pytest.raises(LLMError, match=r"`claude -p` exited 1: boom"):
719+
p.classify("s", "u")
653720

654721

655722
def test_claude_code_classify_malformed_json_raises_llm_error():
656-
with patch(
657-
"mempalace.llm_client.subprocess.run",
658-
return_value=_mock_completed(0, stdout="not valid json"),
659-
):
660-
p = ClaudeCodeProvider(model="claude-haiku-4-5")
661-
with pytest.raises(LLMError, match="non-JSON envelope"):
662-
p.classify("s", "u")
723+
with patch("mempalace.llm_client.shutil.which", return_value=_FAKE_CLAUDE_BIN):
724+
with patch(
725+
"mempalace.llm_client.subprocess.run",
726+
return_value=_mock_completed(0, stdout="not valid json"),
727+
):
728+
p = ClaudeCodeProvider(model="claude-haiku-4-5")
729+
with pytest.raises(LLMError, match="non-JSON envelope"):
730+
p.classify("s", "u")
663731

664732

665733
def test_claude_code_classify_empty_result_raises_llm_error():
666-
with patch(
667-
"mempalace.llm_client.subprocess.run",
668-
return_value=_mock_completed(0, stdout=_claude_envelope("")),
669-
):
670-
p = ClaudeCodeProvider(model="claude-haiku-4-5")
671-
with pytest.raises(LLMError, match="empty result"):
672-
p.classify("s", "u")
734+
with patch("mempalace.llm_client.shutil.which", return_value=_FAKE_CLAUDE_BIN):
735+
with patch(
736+
"mempalace.llm_client.subprocess.run",
737+
return_value=_mock_completed(0, stdout=_claude_envelope("")),
738+
):
739+
p = ClaudeCodeProvider(model="claude-haiku-4-5")
740+
with pytest.raises(LLMError, match="empty result"):
741+
p.classify("s", "u")
673742

674743

675744
@pytest.mark.skipif(

0 commit comments

Comments
 (0)