Skip to content

Commit 72cbfb5

Browse files
feat(privacy): blocking consent gate for env-fallback LLM API keys
Adds api_key_source provenance ('flag' | 'env' | None) to LLMProvider so cmd_init can distinguish a key passed via --llm-api-key (explicit opt-in) from one silently picked up via OPENAI_API_KEY / ANTHROPIC_API_KEY shell env (stray credential). When the endpoint is external AND api_key_source == 'env', init now prints a blocking [y/N] prompt before any data is sent. Anything other than 'y' drops the LLM and falls back to heuristics-only. Adds --accept-external-llm flag for CI / non-interactive bypass. Completes the UX gap in #1224: the URL-based warning was informational and init kept running, so a user who didn't notice the line had already leaked. The consent prompt is the actual gate; explicit flag-passed keys remain treated as already-consented.
1 parent 899a5ec commit 72cbfb5

4 files changed

Lines changed: 316 additions & 4 deletions

File tree

mempalace/cli.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,33 @@ def cmd_init(args):
276276
f"retains, or uses your data. Pass --no-llm to keep "
277277
f"init fully local."
278278
)
279+
# Consent gate (issue #26): block init when the api_key
280+
# was acquired via env-fallback (stray credential in
281+
# shell env). Explicit --llm-api-key (api_key_source ==
282+
# "flag") means the user already opted in.
283+
# --accept-external-llm bypasses for CI / non-interactive.
284+
api_key_source = getattr(candidate, "api_key_source", None)
285+
accept_flag = getattr(args, "accept_external_llm", False)
286+
if api_key_source == "env" and not accept_flag:
287+
try:
288+
answer = (
289+
input(
290+
" Your API key was loaded from the environment "
291+
"(not passed via --llm-api-key). Continue with "
292+
"external LLM? [y/N] "
293+
)
294+
.strip()
295+
.lower()
296+
)
297+
except EOFError:
298+
answer = ""
299+
if answer != "y":
300+
print(
301+
" Declined — falling back to heuristics-only. "
302+
"Pass --llm-api-key explicitly or "
303+
"--accept-external-llm to skip this prompt."
304+
)
305+
llm_provider = None
279306
else:
280307
print(
281308
f" No LLM provider reachable ({msg}). "
@@ -971,6 +998,16 @@ def main():
971998
"for openai-compat, defaults to $OPENAI_API_KEY."
972999
),
9731000
)
1001+
p_init.add_argument(
1002+
"--accept-external-llm",
1003+
action="store_true",
1004+
help=(
1005+
"Bypass the interactive consent prompt that fires when an external "
1006+
"LLM is configured via an environment-variable API key (issue #26). "
1007+
"Use this in CI / non-interactive runs where you've already decided "
1008+
"the external send is acceptable."
1009+
),
1010+
)
9741011

9751012
# mine
9761013
p_mine = sub.add_parser("mine", help="Mine files into the palace")

mempalace/llm_client.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -127,11 +127,18 @@ def __init__(
127127
endpoint: Optional[str] = None,
128128
api_key: Optional[str] = None,
129129
timeout: int = 120,
130+
api_key_source: Optional[str] = None,
130131
):
131132
self.model = model
132133
self.endpoint = endpoint
133134
self.api_key = api_key
134135
self.timeout = timeout
136+
# Provenance of api_key (issue #26): "flag" when the constructor
137+
# received an explicit api_key arg, "env" when it fell back to an
138+
# environment variable, None when no key is in play. cmd_init
139+
# uses this to gate the consent prompt — stray env-resolved keys
140+
# require explicit user confirmation.
141+
self.api_key_source = api_key_source
135142

136143
def classify(self, system: str, user: str, json_mode: bool = True) -> LLMResponse:
137144
raise NotImplementedError
@@ -253,8 +260,20 @@ def __init__(
253260
timeout: int = 120,
254261
**_: object,
255262
):
256-
resolved_key = api_key or os.environ.get("OPENAI_API_KEY")
257-
super().__init__(model=model, endpoint=endpoint, api_key=resolved_key, timeout=timeout)
263+
if api_key:
264+
resolved_key = api_key
265+
source: Optional[str] = "flag"
266+
else:
267+
env_key = os.environ.get("OPENAI_API_KEY")
268+
resolved_key = env_key or None
269+
source = "env" if env_key else None
270+
super().__init__(
271+
model=model,
272+
endpoint=endpoint,
273+
api_key=resolved_key,
274+
timeout=timeout,
275+
api_key_source=source,
276+
)
258277

259278
def _resolve_url(self) -> str:
260279
if not self.endpoint:
@@ -321,12 +340,19 @@ def __init__(
321340
timeout: int = 120,
322341
**_: object,
323342
):
324-
key = api_key or os.environ.get("ANTHROPIC_API_KEY")
343+
if api_key:
344+
resolved_key = api_key
345+
source: Optional[str] = "flag"
346+
else:
347+
env_key = os.environ.get("ANTHROPIC_API_KEY")
348+
resolved_key = env_key or None
349+
source = "env" if env_key else None
325350
super().__init__(
326351
model=model,
327352
endpoint=endpoint or self.DEFAULT_ENDPOINT,
328-
api_key=key,
353+
api_key=resolved_key,
329354
timeout=timeout,
355+
api_key_source=source,
330356
)
331357

332358
def check_available(self) -> tuple[bool, str]:

tests/test_corpus_origin_integration.py

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1830,3 +1830,195 @@ def test_init_no_privacy_warning_with_no_llm_flag(ai_dialogue_corpus: Path, tmp_
18301830
assert (
18311831
"EXTERNAL API" not in out
18321832
), f"Privacy warning fired on --no-llm path — should not have. Got: {out!r}"
1833+
1834+
1835+
# ─────────────────────────────────────────────────────────────────────────
1836+
# Consent gate for stray env-fallback API keys (issue #26).
1837+
#
1838+
# The #1224 warning is informational — init keeps going. That's
1839+
# "warning theater" if a user wasn't paying attention. #26 adds a
1840+
# blocking [y/N] prompt when the api_key was acquired via env fallback
1841+
# (OPENAI_API_KEY / ANTHROPIC_API_KEY) AND the endpoint is external.
1842+
# Explicit --llm-api-key (api_key_source == "flag") = user opted in.
1843+
# --accept-external-llm bypasses for CI / non-interactive.
1844+
# ─────────────────────────────────────────────────────────────────────────
1845+
1846+
1847+
def _external_env_provider():
1848+
"""Build a fake provider matching the 'stray env-fallback API key
1849+
pointed at external endpoint' scenario — the case #26 must gate."""
1850+
p = MagicMock()
1851+
p.check_available.return_value = (True, "ok")
1852+
p.is_external_service = True
1853+
p.api_key_source = "env"
1854+
p.classify.return_value = MagicMock(text='{"classifications": []}')
1855+
return p
1856+
1857+
1858+
def test_init_blocks_with_consent_prompt_when_api_key_from_env(
1859+
ai_dialogue_corpus: Path, tmp_path: Path, capsys
1860+
):
1861+
"""When provider is external AND api_key_source=='env' AND
1862+
--accept-external-llm is NOT set, cmd_init MUST call input() to
1863+
block on user consent. No bypass = blocking prompt."""
1864+
from mempalace.cli import cmd_init
1865+
1866+
palace = tmp_path / "palace"
1867+
args = _init_args(ai_dialogue_corpus)
1868+
fake_provider = _external_env_provider()
1869+
1870+
with (
1871+
patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
1872+
patch("mempalace.cli.get_provider", return_value=fake_provider),
1873+
patch("mempalace.cli._maybe_run_mine_after_init"),
1874+
patch("mempalace.room_detector_local.detect_rooms_local"),
1875+
patch("builtins.input", return_value="y") as mock_input,
1876+
):
1877+
cmd_init(args)
1878+
1879+
assert mock_input.called, (
1880+
"Stray env-fallback api_key + external endpoint MUST trigger a "
1881+
"blocking consent prompt. input() was never called."
1882+
)
1883+
1884+
1885+
def test_init_consent_prompt_y_proceeds_with_llm(ai_dialogue_corpus: Path, tmp_path: Path, capsys):
1886+
"""If user types 'y' at the consent prompt, init proceeds with the
1887+
LLM — provider.classify() is invoked during Pass 0 / refinement."""
1888+
from mempalace.cli import cmd_init
1889+
1890+
palace = tmp_path / "palace"
1891+
args = _init_args(ai_dialogue_corpus)
1892+
fake_provider = _external_env_provider()
1893+
1894+
with (
1895+
patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
1896+
patch("mempalace.cli.get_provider", return_value=fake_provider),
1897+
patch("mempalace.cli._maybe_run_mine_after_init"),
1898+
patch("mempalace.room_detector_local.detect_rooms_local"),
1899+
patch("builtins.input", return_value="y"),
1900+
):
1901+
cmd_init(args)
1902+
1903+
assert fake_provider.classify.called, (
1904+
"After 'y' consent, the LLM provider must be used. "
1905+
"classify() was never called — gate dropped llm_provider on the floor."
1906+
)
1907+
1908+
1909+
def test_init_consent_prompt_n_falls_back_to_heuristic(
1910+
ai_dialogue_corpus: Path, tmp_path: Path, capsys
1911+
):
1912+
"""If user types 'n' (or anything not 'y'), init drops the LLM and
1913+
falls back to heuristics-only — provider.classify() must NOT run."""
1914+
from mempalace.cli import cmd_init
1915+
1916+
palace = tmp_path / "palace"
1917+
args = _init_args(ai_dialogue_corpus)
1918+
fake_provider = _external_env_provider()
1919+
1920+
with (
1921+
patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
1922+
patch("mempalace.cli.get_provider", return_value=fake_provider),
1923+
patch("mempalace.cli._maybe_run_mine_after_init"),
1924+
patch("mempalace.room_detector_local.detect_rooms_local"),
1925+
patch("builtins.input", return_value="n"),
1926+
):
1927+
cmd_init(args)
1928+
1929+
assert not fake_provider.classify.called, (
1930+
"Declined consent ('n') must drop the provider — classify() "
1931+
"should never be invoked when the user said no."
1932+
)
1933+
1934+
1935+
def test_init_no_consent_prompt_when_api_key_from_flag(
1936+
ai_dialogue_corpus: Path, tmp_path: Path, capsys
1937+
):
1938+
"""Explicit --llm-api-key means user already opted in. The consent
1939+
prompt MUST NOT fire when api_key_source == 'flag', even if the
1940+
endpoint is external."""
1941+
from mempalace.cli import cmd_init
1942+
1943+
palace = tmp_path / "palace"
1944+
args = _init_args(ai_dialogue_corpus, llm_api_key="sk-explicit")
1945+
fake_provider = MagicMock()
1946+
fake_provider.check_available.return_value = (True, "ok")
1947+
fake_provider.is_external_service = True
1948+
fake_provider.api_key_source = "flag" # explicit flag = no gate
1949+
fake_provider.classify.return_value = MagicMock(text='{"classifications": []}')
1950+
1951+
with (
1952+
patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
1953+
patch("mempalace.cli.get_provider", return_value=fake_provider),
1954+
patch("mempalace.cli._maybe_run_mine_after_init"),
1955+
patch("mempalace.room_detector_local.detect_rooms_local"),
1956+
patch("builtins.input") as mock_input,
1957+
):
1958+
cmd_init(args)
1959+
1960+
assert not mock_input.called, (
1961+
"Explicit --llm-api-key (api_key_source='flag') must NOT trigger "
1962+
"the consent prompt. User already opted in by passing the flag."
1963+
)
1964+
1965+
1966+
def test_init_accept_external_llm_flag_bypasses_consent_prompt(
1967+
ai_dialogue_corpus: Path, tmp_path: Path, capsys
1968+
):
1969+
"""--accept-external-llm is the non-interactive bypass for CI. With
1970+
the flag set, the consent prompt MUST NOT fire even when the
1971+
api_key came from env-fallback."""
1972+
from mempalace.cli import cmd_init
1973+
1974+
palace = tmp_path / "palace"
1975+
args = _init_args(ai_dialogue_corpus, accept_external_llm=True)
1976+
fake_provider = _external_env_provider()
1977+
1978+
with (
1979+
patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
1980+
patch("mempalace.cli.get_provider", return_value=fake_provider),
1981+
patch("mempalace.cli._maybe_run_mine_after_init"),
1982+
patch("mempalace.room_detector_local.detect_rooms_local"),
1983+
patch("builtins.input") as mock_input,
1984+
):
1985+
cmd_init(args)
1986+
1987+
assert not mock_input.called, (
1988+
"--accept-external-llm must bypass the consent prompt for "
1989+
"non-interactive / CI use. input() was called anyway."
1990+
)
1991+
assert (
1992+
fake_provider.classify.called
1993+
), "With --accept-external-llm, init must proceed with the LLM."
1994+
1995+
1996+
def test_init_no_consent_prompt_when_endpoint_is_local(
1997+
ai_dialogue_corpus: Path, tmp_path: Path, capsys
1998+
):
1999+
"""Stray env-fallback api_key on a LOCAL endpoint (e.g. LM Studio
2000+
on localhost with OPENAI_API_KEY in shell env) must NOT trigger the
2001+
prompt. Nothing leaves the machine — no consent needed."""
2002+
from mempalace.cli import cmd_init
2003+
2004+
palace = tmp_path / "palace"
2005+
args = _init_args(ai_dialogue_corpus)
2006+
fake_provider = MagicMock()
2007+
fake_provider.check_available.return_value = (True, "ok")
2008+
fake_provider.is_external_service = False # localhost / LAN — no leak
2009+
fake_provider.api_key_source = "env" # stray key, but URL is local
2010+
fake_provider.classify.return_value = MagicMock(text='{"classifications": []}')
2011+
2012+
with (
2013+
patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
2014+
patch("mempalace.cli.get_provider", return_value=fake_provider),
2015+
patch("mempalace.cli._maybe_run_mine_after_init"),
2016+
patch("mempalace.room_detector_local.detect_rooms_local"),
2017+
patch("builtins.input") as mock_input,
2018+
):
2019+
cmd_init(args)
2020+
2021+
assert not mock_input.called, (
2022+
"Local endpoint (is_external_service=False) must NOT trigger the "
2023+
"consent prompt regardless of api_key_source. Nothing leaves the box."
2024+
)

tests/test_llm_client.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,3 +426,60 @@ def test_openai_compat_provider_outside_tailscale_cgnat_is_external():
426426
f"Address {endpoint} ({label}) is OUTSIDE Tailscale CGNAT and "
427427
f"should remain external; got is_external_service={p.is_external_service}"
428428
)
429+
430+
431+
# ── api_key_source provenance tracking (issue #26) ──────────────────────
432+
#
433+
# Distinguishes whether `api_key` was set via explicit constructor arg
434+
# (= --llm-api-key flag → "flag") vs via environment-variable fallback
435+
# (OPENAI_API_KEY / ANTHROPIC_API_KEY → "env"). cmd_init uses this to
436+
# decide whether to block on a consent prompt: stray env-fallback keys
437+
# require explicit user confirmation; explicit flag-passed keys are
438+
# treated as already-consented.
439+
440+
441+
def test_openai_compat_api_key_source_flag_when_explicit(monkeypatch):
442+
"""When ``api_key`` is passed explicitly to the constructor, the
443+
provider records ``api_key_source == "flag"`` even if the same env
444+
var is also set. Flag wins over env."""
445+
monkeypatch.setenv("OPENAI_API_KEY", "sk-from-env-irrelevant")
446+
p = OpenAICompatProvider(model="x", endpoint="http://h", api_key="sk-from-flag")
447+
assert p.api_key == "sk-from-flag"
448+
assert (
449+
p.api_key_source == "flag"
450+
), f"Explicit api_key arg must produce api_key_source='flag'; got {p.api_key_source!r}"
451+
452+
453+
def test_openai_compat_api_key_source_env_when_fallback(monkeypatch):
454+
"""When ``api_key`` arg is None but ``OPENAI_API_KEY`` is set, the
455+
provider falls back to env and records ``api_key_source == "env"``.
456+
This is the "stray key" case — user didn't explicitly authorize this
457+
run to use the env-resolved credential."""
458+
monkeypatch.setenv("OPENAI_API_KEY", "sk-from-env")
459+
p = OpenAICompatProvider(model="x", endpoint="http://h")
460+
assert p.api_key == "sk-from-env"
461+
assert (
462+
p.api_key_source == "env"
463+
), f"Env-fallback api_key must produce api_key_source='env'; got {p.api_key_source!r}"
464+
465+
466+
def test_anthropic_api_key_source_tracking(monkeypatch):
467+
"""AnthropicProvider tracks api_key_source the same way: 'flag' when
468+
passed explicitly, 'env' when resolved from ANTHROPIC_API_KEY env."""
469+
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-env")
470+
p_flag = AnthropicProvider(model="claude-haiku", api_key="sk-ant-flag")
471+
assert (
472+
p_flag.api_key_source == "flag"
473+
), f"Explicit api_key must produce 'flag'; got {p_flag.api_key_source!r}"
474+
p_env = AnthropicProvider(model="claude-haiku")
475+
assert p_env.api_key == "sk-ant-env"
476+
assert (
477+
p_env.api_key_source == "env"
478+
), f"Env-fallback must produce 'env'; got {p_env.api_key_source!r}"
479+
480+
481+
def test_ollama_api_key_source_is_none():
482+
"""Ollama doesn't use api_key at all; ``api_key_source`` should be None."""
483+
p = OllamaProvider(model="gemma4:e4b")
484+
assert p.api_key is None
485+
assert p.api_key_source is None

0 commit comments

Comments
 (0)