@@ -1483,6 +1483,19 @@ def test_merge_tier_fields_heuristic_yes_llm_no_keeps_heuristic_bool():
14831483 f"Merged result must KEEP heuristic's True, not flip to False. "
14841484 f"Got: { res } "
14851485 )
1486+ # The bool and the confidence are paired — both must come from the
1487+ # heuristic. Compare to detect_origin_heuristic on the same samples
1488+ # so this stays correct regardless of what the heuristic computes
1489+ # for these samples (avoids brittleness vs. a hardcoded sentinel).
1490+ from mempalace .corpus_origin import detect_origin_heuristic
1491+
1492+ expected_confidence = detect_origin_heuristic (_ai_dialogue_samples ()).confidence
1493+ assert res ["confidence" ] == expected_confidence , (
1494+ f"Merged confidence { res ['confidence' ]} did not match the heuristic's "
1495+ f"{ expected_confidence } for these samples. The mocked LLM returned "
1496+ f"0.90; if the merge accidentally took the LLM's confidence, the "
1497+ f"merged value would not equal the heuristic's. Got: { res } "
1498+ )
14861499 # Persona/user/platform from LLM should still be merged in.
14871500 assert res ["agent_persona_names" ] == [
14881501 "Echo" ,
@@ -1539,6 +1552,13 @@ def test_merge_tier_fields_heuristic_no_no_personas_leak():
15391552 assert (
15401553 res ["agent_persona_names" ] == []
15411554 ), f"No personas should leak when both tiers report none. Got: { res } "
1555+ # Heuristic owns confidence. Mocked LLM returned 0.95; heuristic's
1556+ # narrative-branch confidence is 0.9. Verifying we kept 0.9 catches
1557+ # any future regression that lets LLM confidence override heuristic.
1558+ assert res ["confidence" ] == 0.9 , (
1559+ f"Heuristic confidently classified narrative at 0.9; mocked LLM "
1560+ f"returned 0.95. Merge must keep heuristic's 0.9. Got: { res } "
1561+ )
15421562
15431563
15441564def test_merge_tier_fields_heuristic_yes_llm_yes_combines_evidence ():
@@ -1597,6 +1617,75 @@ def test_merge_tier_fields_heuristic_yes_llm_yes_combines_evidence():
15971617 assert len (res ["evidence" ]) >= 2 , (
15981618 f"Combined evidence should include both heuristic + LLM lines. " f"Got: { res ['evidence' ]} "
15991619 )
1620+ # Each entry must carry its tier prefix so on-disk origin.json is
1621+ # auditable — readers can tell which tier produced which signal line.
1622+ tier1_lines = [e for e in res ["evidence" ] if e .startswith ("Tier-1 heuristic: " )]
1623+ tier2_lines = [e for e in res ["evidence" ] if e .startswith ("Tier-2 LLM: " )]
1624+ assert tier1_lines , (
1625+ f"Expected at least one 'Tier-1 heuristic: ' prefixed evidence line. "
1626+ f"Got: { res ['evidence' ]} "
1627+ )
1628+ assert tier2_lines , (
1629+ f"Expected at least one 'Tier-2 LLM: ' prefixed evidence line. " f"Got: { res ['evidence' ]} "
1630+ )
1631+ # Every entry should be tier-prefixed (no untagged passthrough).
1632+ untagged = [
1633+ e
1634+ for e in res ["evidence" ]
1635+ if not (e .startswith ("Tier-1 heuristic: " ) or e .startswith ("Tier-2 LLM: " ))
1636+ ]
1637+ assert not untagged , f"Untagged evidence entries leaked into merge: { untagged } "
1638+
1639+
1640+ def test_merge_tier_fields_confidence_matches_heuristic_call ():
1641+ """Pin the contract: merged confidence equals what `detect_origin_heuristic`
1642+ returns for the same samples — independent of what the LLM produced.
1643+
1644+ Catches a regression class where some future refactor lets Tier 2's
1645+ confidence creep back into the merged result.
1646+ """
1647+ from unittest .mock import MagicMock , patch
1648+
1649+ from mempalace .cli import _run_pass_zero
1650+ from mempalace .corpus_origin import CorpusOriginResult , detect_origin_heuristic
1651+
1652+ samples = _ai_dialogue_samples ()
1653+ expected_confidence = detect_origin_heuristic (samples ).confidence
1654+
1655+ fake_provider = MagicMock ()
1656+ # LLM picks a deliberately distinct confidence so any leak is visible.
1657+ llm_distinct_result = CorpusOriginResult (
1658+ likely_ai_dialogue = True ,
1659+ confidence = 0.123456 ,
1660+ primary_platform = "Claude (Anthropic)" ,
1661+ user_name = None ,
1662+ agent_persona_names = [],
1663+ evidence = ["LLM said yes with an unusual confidence" ],
1664+ )
1665+
1666+ import tempfile
1667+
1668+ with tempfile .TemporaryDirectory () as tmp_dir :
1669+ project_dir = Path (tmp_dir ) / "project"
1670+ project_dir .mkdir ()
1671+ for i , sample in enumerate (samples ):
1672+ (project_dir / f"log{ i } .md" ).write_text (sample )
1673+ palace_dir = Path (tmp_dir ) / "palace"
1674+
1675+ with patch ("mempalace.cli.detect_origin_llm" , return_value = llm_distinct_result ):
1676+ wrapped = _run_pass_zero (
1677+ project_dir = str (project_dir ),
1678+ palace_dir = str (palace_dir ),
1679+ llm_provider = fake_provider ,
1680+ )
1681+
1682+ assert wrapped is not None
1683+ res = wrapped ["result" ]
1684+ assert res ["confidence" ] == expected_confidence , (
1685+ f"Merged confidence { res ['confidence' ]} did not match "
1686+ f"detect_origin_heuristic's { expected_confidence } . Looks like "
1687+ f"LLM's 0.123456 (or another source) leaked through the merge."
1688+ )
16001689
16011690
16021691def test_merge_tier_fields_no_llm_provider_returns_heuristic_only ():
0 commit comments