diff --git a/mempalace/backends/chroma.py b/mempalace/backends/chroma.py index afd8083bb..21cabc960 100644 --- a/mempalace/backends/chroma.py +++ b/mempalace/backends/chroma.py @@ -7,6 +7,7 @@ from typing import Any, Optional import chromadb +from chromadb.config import Settings from .base import ( BaseBackend, @@ -20,6 +21,10 @@ _IncludeSpec, ) +#: Shared ChromaDB settings that silence the posthog telemetry spam +#: (see https://github.com/MemPalace/mempalace/issues/458). +CHROMA_SETTINGS = Settings(anonymized_telemetry=False) + logger = logging.getLogger(__name__) @@ -466,7 +471,7 @@ def _client(self, palace_path: str): if cached is None or inode_changed or mtime_changed or mtime_appeared: _fix_blob_seq_ids(palace_path) - cached = chromadb.PersistentClient(path=palace_path) + cached = chromadb.PersistentClient(path=palace_path, settings=CHROMA_SETTINGS) self._clients[palace_path] = cached # Re-stat after the client constructor runs: chromadb creates # chroma.sqlite3 lazily, so the stat captured before the call @@ -487,7 +492,7 @@ def make_client(palace_path: str): :meth:`get_collection` which manages caching internally. """ _fix_blob_seq_ids(palace_path) - return chromadb.PersistentClient(path=palace_path) + return chromadb.PersistentClient(path=palace_path, settings=CHROMA_SETTINGS) @staticmethod def backend_version() -> str: diff --git a/mempalace/dedup.py b/mempalace/dedup.py index 6b1bac106..24e9a03ef 100644 --- a/mempalace/dedup.py +++ b/mempalace/dedup.py @@ -29,6 +29,7 @@ from .backends.chroma import ChromaBackend +from .backends.chroma import CHROMA_SETTINGS COLLECTION_NAME = "mempalace_drawers" # Cosine DISTANCE threshold (not similarity). Lower = stricter. diff --git a/mempalace/repair.py b/mempalace/repair.py index 9a9aa8845..64538c04d 100644 --- a/mempalace/repair.py +++ b/mempalace/repair.py @@ -34,6 +34,7 @@ from .backends.chroma import ChromaBackend +from .backends.chroma import CHROMA_SETTINGS COLLECTION_NAME = "mempalace_drawers" diff --git a/tests/conftest.py b/tests/conftest.py index 7b2bb7713..7bc433728 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -30,6 +30,7 @@ import chromadb # noqa: E402 import pytest # noqa: E402 +from mempalace.backends.chroma import CHROMA_SETTINGS # noqa: E402 from mempalace.config import MempalaceConfig # noqa: E402 from mempalace.knowledge_graph import KnowledgeGraph # noqa: E402 @@ -100,7 +101,7 @@ def config(tmp_dir, palace_path): @pytest.fixture def collection(palace_path): """A ChromaDB collection pre-seeded in the temp palace.""" - client = chromadb.PersistentClient(path=palace_path) + client = chromadb.PersistentClient(path=palace_path, settings=CHROMA_SETTINGS) col = client.get_or_create_collection("mempalace_drawers", metadata={"hnsw:space": "cosine"}) yield col client.delete_collection("mempalace_drawers") diff --git a/tests/test_backends.py b/tests/test_backends.py index b3f009a25..af8703fe1 100644 --- a/tests/test_backends.py +++ b/tests/test_backends.py @@ -13,6 +13,7 @@ get_backend, ) from mempalace.backends.chroma import ( + CHROMA_SETTINGS, ChromaBackend, ChromaCollection, _fix_blob_seq_ids, @@ -257,7 +258,9 @@ def test_chroma_cache_picks_up_db_created_after_first_open(tmp_path): # Use a real chromadb call so _fix_blob_seq_ids and PersistentClient succeed. import chromadb as _chromadb - _chromadb.PersistentClient(path=str(palace_path)).get_or_create_collection("seed") + _chromadb.PersistentClient( + path=str(palace_path), settings=CHROMA_SETTINGS + ).get_or_create_collection("seed") assert (palace_path / "chroma.sqlite3").is_file() # Next _client() call must detect the 0 → nonzero transition and rebuild. @@ -316,7 +319,7 @@ def test_chroma_backend_create_true_creates_directory_and_collection(tmp_path): assert palace_path.is_dir() assert isinstance(collection, ChromaCollection) - client = chromadb.PersistentClient(path=str(palace_path)) + client = chromadb.PersistentClient(path=str(palace_path), settings=CHROMA_SETTINGS) client.get_collection("mempalace_drawers") @@ -329,7 +332,7 @@ def test_chroma_backend_creates_collection_with_cosine_distance(tmp_path): create=True, ) - client = chromadb.PersistentClient(path=str(palace_path)) + client = chromadb.PersistentClient(path=str(palace_path), settings=CHROMA_SETTINGS) col = client.get_collection("mempalace_drawers") assert col.metadata.get("hnsw:space") == "cosine" diff --git a/tests/test_convo_miner.py b/tests/test_convo_miner.py index 166644b00..2499fc6c7 100644 --- a/tests/test_convo_miner.py +++ b/tests/test_convo_miner.py @@ -5,6 +5,7 @@ import chromadb +from mempalace.backends.chroma import CHROMA_SETTINGS from mempalace.convo_miner import mine_convos from mempalace.palace import file_already_mined @@ -19,7 +20,7 @@ def test_convo_mining(): palace_path = os.path.join(tmpdir, "palace") mine_convos(tmpdir, palace_path, wing="test_convos") - client = chromadb.PersistentClient(path=palace_path) + client = chromadb.PersistentClient(settings=CHROMA_SETTINGS, path=palace_path) col = client.get_collection("mempalace_drawers") assert col.count() >= 2 @@ -46,7 +47,7 @@ def test_mine_convos_does_not_reprocess_short_files(capsys): # Verify sentinel was written (resolve path -- macOS /var -> /private/var) resolved_file = str(Path(tmpdir).resolve() / "tiny.txt") - client = chromadb.PersistentClient(path=palace_path) + client = chromadb.PersistentClient(settings=CHROMA_SETTINGS, path=palace_path) col = client.get_collection("mempalace_drawers") assert file_already_mined(col, resolved_file) @@ -100,7 +101,7 @@ def test_mine_convos_rebuilds_stale_drawers_after_schema_bump(capsys): mine_convos(tmpdir, palace_path, wing="test") capsys.readouterr() - client = chromadb.PersistentClient(path=palace_path) + client = chromadb.PersistentClient(settings=CHROMA_SETTINGS, path=palace_path) col = client.get_collection("mempalace_drawers") resolved = str(Path(tmpdir).resolve() / "chat.txt") first_pass = col.get(where={"source_file": resolved}) @@ -144,7 +145,7 @@ def test_mine_convos_rebuilds_stale_drawers_after_schema_bump(capsys): "Files skipped (already filed): 0" in out ), "stale drawers should force a rebuild, not a skip" - client = chromadb.PersistentClient(path=palace_path) + client = chromadb.PersistentClient(settings=CHROMA_SETTINGS, path=palace_path) col = client.get_collection("mempalace_drawers") rebuilt = col.get(where={"source_file": resolved}) # Orphan is gone diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 899e6a7c7..957f87ea8 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -28,8 +28,9 @@ def _get_collection(palace_path, create=False): when they are done. """ import chromadb + from mempalace.backends.chroma import CHROMA_SETTINGS - client = chromadb.PersistentClient(path=palace_path) + client = chromadb.PersistentClient(path=palace_path, settings=CHROMA_SETTINGS) if create: return ( client, @@ -220,10 +221,11 @@ def test_status_cold_start_no_collection(self, monkeypatch, config, palace_path, should return total_drawers: 0, not 'No palace found'. """ import chromadb + from mempalace.backends.chroma import CHROMA_SETTINGS _patch_mcp_server(monkeypatch, config, kg) # Create the DB file (init does this) but NOT the collection - client = chromadb.PersistentClient(path=palace_path) + client = chromadb.PersistentClient(path=palace_path, settings=CHROMA_SETTINGS) del client from mempalace.mcp_server import tool_status @@ -476,9 +478,9 @@ def test_add_drawer_shared_header_no_collision(self, monkeypatch, config, palace assert result1["success"] is True assert result2["success"] is True - assert ( - result1["drawer_id"] != result2["drawer_id"] - ), "Documents with shared header but different content must have distinct drawer IDs" + assert result1["drawer_id"] != result2["drawer_id"], ( + "Documents with shared header but different content must have distinct drawer IDs" + ) def test_delete_drawer(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) diff --git a/tests/test_miner.py b/tests/test_miner.py index 0c81dff7f..c136166b5 100644 --- a/tests/test_miner.py +++ b/tests/test_miner.py @@ -6,6 +6,8 @@ import chromadb import yaml +from mempalace.backends.chroma import CHROMA_SETTINGS + from mempalace.miner import load_config, mine, scan_project, status from mempalace.palace import NORMALIZE_VERSION, file_already_mined @@ -45,7 +47,7 @@ def test_project_mining(): palace_path = project_root / "palace" mine(str(project_root), str(palace_path)) - client = chromadb.PersistentClient(path=str(palace_path)) + client = chromadb.PersistentClient(settings=CHROMA_SETTINGS, path=str(palace_path)) col = client.get_collection("mempalace_drawers") assert col.count() > 0 finally: @@ -246,7 +248,7 @@ def test_file_already_mined_check_mtime(): try: palace_path = os.path.join(tmpdir, "palace") os.makedirs(palace_path) - client = chromadb.PersistentClient(path=palace_path) + client = chromadb.PersistentClient(settings=CHROMA_SETTINGS, path=palace_path) col = client.get_or_create_collection( "mempalace_drawers", metadata={"hnsw:space": "cosine"} ) @@ -386,7 +388,7 @@ def test_file_already_mined_returns_false_for_stale_normalize_version(): try: palace_path = os.path.join(tmpdir, "palace") os.makedirs(palace_path) - client = chromadb.PersistentClient(path=palace_path) + client = chromadb.PersistentClient(settings=CHROMA_SETTINGS, path=palace_path) col = client.get_or_create_collection("mempalace_drawers") # Pre-v2 drawer: no normalize_version field at all @@ -428,7 +430,7 @@ def test_add_drawer_stamps_normalize_version(tmp_path): palace_path = tmp_path / "palace" palace_path.mkdir() - client = chromadb.PersistentClient(path=str(palace_path)) + client = chromadb.PersistentClient(settings=CHROMA_SETTINGS, path=str(palace_path)) col = client.get_or_create_collection("mempalace_drawers") try: added = add_drawer(