@@ -1483,6 +1483,14 @@ def test_merge_tier_fields_heuristic_yes_llm_no_keeps_heuristic_bool():
14831483 f"Merged result must KEEP heuristic's True, not flip to False. "
14841484 f"Got: { res } "
14851485 )
1486+ # The bool and the confidence are paired — both must come from the
1487+ # heuristic. The mocked LLM returned 0.90; if the merge accidentally
1488+ # took LLM's confidence, this would equal 0.90.
1489+ assert res ["confidence" ] != 0.90 , (
1490+ f"Merged confidence equals the mocked LLM's 0.90 — looks like "
1491+ f"LLM's confidence leaked through the merge. Heuristic's confidence "
1492+ f"must be preserved alongside its bool. Got: { res } "
1493+ )
14861494 # Persona/user/platform from LLM should still be merged in.
14871495 assert res ["agent_persona_names" ] == [
14881496 "Echo" ,
@@ -1539,6 +1547,13 @@ def test_merge_tier_fields_heuristic_no_no_personas_leak():
15391547 assert (
15401548 res ["agent_persona_names" ] == []
15411549 ), f"No personas should leak when both tiers report none. Got: { res } "
1550+ # Heuristic owns confidence. Mocked LLM returned 0.95; heuristic's
1551+ # narrative-branch confidence is 0.9. Verifying we kept 0.9 catches
1552+ # any future regression that lets LLM confidence override heuristic.
1553+ assert res ["confidence" ] == 0.9 , (
1554+ f"Heuristic confidently classified narrative at 0.9; mocked LLM "
1555+ f"returned 0.95. Merge must keep heuristic's 0.9. Got: { res } "
1556+ )
15421557
15431558
15441559def test_merge_tier_fields_heuristic_yes_llm_yes_combines_evidence ():
@@ -1597,6 +1612,75 @@ def test_merge_tier_fields_heuristic_yes_llm_yes_combines_evidence():
15971612 assert len (res ["evidence" ]) >= 2 , (
15981613 f"Combined evidence should include both heuristic + LLM lines. " f"Got: { res ['evidence' ]} "
15991614 )
1615+ # Each entry must carry its tier prefix so on-disk origin.json is
1616+ # auditable — readers can tell which tier produced which signal line.
1617+ tier1_lines = [e for e in res ["evidence" ] if e .startswith ("Tier-1 heuristic: " )]
1618+ tier2_lines = [e for e in res ["evidence" ] if e .startswith ("Tier-2 LLM: " )]
1619+ assert tier1_lines , (
1620+ f"Expected at least one 'Tier-1 heuristic: ' prefixed evidence line. "
1621+ f"Got: { res ['evidence' ]} "
1622+ )
1623+ assert tier2_lines , (
1624+ f"Expected at least one 'Tier-2 LLM: ' prefixed evidence line. " f"Got: { res ['evidence' ]} "
1625+ )
1626+ # Every entry should be tier-prefixed (no untagged passthrough).
1627+ untagged = [
1628+ e
1629+ for e in res ["evidence" ]
1630+ if not (e .startswith ("Tier-1 heuristic: " ) or e .startswith ("Tier-2 LLM: " ))
1631+ ]
1632+ assert not untagged , f"Untagged evidence entries leaked into merge: { untagged } "
1633+
1634+
1635+ def test_merge_tier_fields_confidence_matches_heuristic_call ():
1636+ """Pin the contract: merged confidence equals what `detect_origin_heuristic`
1637+ returns for the same samples — independent of what the LLM produced.
1638+
1639+ Catches a regression class where some future refactor lets Tier 2's
1640+ confidence creep back into the merged result.
1641+ """
1642+ from unittest .mock import MagicMock , patch
1643+
1644+ from mempalace .cli import _run_pass_zero
1645+ from mempalace .corpus_origin import CorpusOriginResult , detect_origin_heuristic
1646+
1647+ samples = _ai_dialogue_samples ()
1648+ expected_confidence = detect_origin_heuristic (samples ).confidence
1649+
1650+ fake_provider = MagicMock ()
1651+ # LLM picks a deliberately distinct confidence so any leak is visible.
1652+ llm_distinct_result = CorpusOriginResult (
1653+ likely_ai_dialogue = True ,
1654+ confidence = 0.123456 ,
1655+ primary_platform = "Claude (Anthropic)" ,
1656+ user_name = None ,
1657+ agent_persona_names = [],
1658+ evidence = ["LLM said yes with an unusual confidence" ],
1659+ )
1660+
1661+ import tempfile
1662+
1663+ with tempfile .TemporaryDirectory () as tmp_dir :
1664+ project_dir = Path (tmp_dir ) / "project"
1665+ project_dir .mkdir ()
1666+ for i , sample in enumerate (samples ):
1667+ (project_dir / f"log{ i } .md" ).write_text (sample )
1668+ palace_dir = Path (tmp_dir ) / "palace"
1669+
1670+ with patch ("mempalace.cli.detect_origin_llm" , return_value = llm_distinct_result ):
1671+ wrapped = _run_pass_zero (
1672+ project_dir = str (project_dir ),
1673+ palace_dir = str (palace_dir ),
1674+ llm_provider = fake_provider ,
1675+ )
1676+
1677+ assert wrapped is not None
1678+ res = wrapped ["result" ]
1679+ assert res ["confidence" ] == expected_confidence , (
1680+ f"Merged confidence { res ['confidence' ]} did not match "
1681+ f"detect_origin_heuristic's { expected_confidence } . Looks like "
1682+ f"LLM's 0.123456 (or another source) leaked through the merge."
1683+ )
16001684
16011685
16021686def test_merge_tier_fields_no_llm_provider_returns_heuristic_only ():
0 commit comments