Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions mempalace/backends/chroma.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import os
import sqlite3
from pathlib import Path
from typing import Any, Optional

import chromadb
Expand Down Expand Up @@ -159,6 +160,9 @@ def _pin_hnsw_threads(collection) -> None:
logger.debug("_pin_hnsw_threads modify failed", exc_info=True)


_BLOB_FIX_MARKER = ".blob_seq_ids_migrated"


def _fix_blob_seq_ids(palace_path: str) -> None:
"""Fix ChromaDB 0.6.x -> 1.5.x migration bug: BLOB seq_ids -> INTEGER.

Expand All @@ -168,10 +172,19 @@ def _fix_blob_seq_ids(palace_path: str) -> None:
type INTEGER) is not compatible with SQL type BLOB".

Must run BEFORE PersistentClient is created (the compactor fires on init).

Opening a Python sqlite3 connection against a ChromaDB 1.5.x WAL-mode
database leaves state that segfaults the next PersistentClient call. After
the migration has run once successfully, a marker file is written so
subsequent opens skip the sqlite connection entirely. Already-migrated
palaces can touch the marker manually to opt into the fast path.
"""
db_path = os.path.join(palace_path, "chroma.sqlite3")
if not os.path.isfile(db_path):
return
marker = os.path.join(palace_path, _BLOB_FIX_MARKER)
if os.path.isfile(marker):
return
Comment on lines +185 to +187
Copy link

Copilot AI Apr 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New marker-file behavior isn’t covered by tests: there are existing tests for _fix_blob_seq_ids, but none assert (1) marker creation on successful no-op/migration, and (2) early return when the marker exists (ensuring sqlite3.connect() is not called). Adding/adjusting tests would prevent regressions on this reliability guard.

Copilot uses AI. Check for mistakes.
try:
with sqlite3.connect(db_path) as conn:
Comment on lines 188 to 189
Copy link

Copilot AI Apr 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

migrated is assigned but never read. This looks like leftover state from an earlier version; please remove it or use it (e.g., for conditional logging/marker handling) to avoid dead code and lint noise.

Copilot uses AI. Check for mistakes.
for table in ("embeddings", "max_seq_id"):
Expand All @@ -189,6 +202,14 @@ def _fix_blob_seq_ids(palace_path: str) -> None:
conn.commit()
except Exception:
logger.exception("Could not fix BLOB seq_ids in %s", db_path)
return
# Write marker whether or not rows needed migration — the palace is now
# confirmed to be in the INTEGER-seq_id state and future opens can skip the
# sqlite3.connect() entirely.
try:
Path(marker).touch()
except OSError:
logger.exception("Could not write migration marker %s", marker)


# ---------------------------------------------------------------------------
Expand Down
66 changes: 66 additions & 0 deletions tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,72 @@ def test_fix_blob_seq_ids_noop_without_database(tmp_path):
_fix_blob_seq_ids(str(tmp_path)) # should not raise


def test_fix_blob_seq_ids_writes_marker_after_blob_path(tmp_path):
"""The .blob_seq_ids_migrated marker is written after a successful BLOB → INTEGER conversion."""
from mempalace.backends.chroma import _BLOB_FIX_MARKER

db_path = tmp_path / "chroma.sqlite3"
conn = sqlite3.connect(str(db_path))
conn.execute("CREATE TABLE embeddings (rowid INTEGER PRIMARY KEY, seq_id)")
conn.execute("INSERT INTO embeddings (seq_id) VALUES (?)", ((42).to_bytes(8, "big"),))
conn.commit()
conn.close()

marker = tmp_path / _BLOB_FIX_MARKER
assert not marker.exists()

_fix_blob_seq_ids(str(tmp_path))

assert marker.is_file(), "marker must be written after a successful migration"


def test_fix_blob_seq_ids_writes_marker_when_already_integer(tmp_path):
"""The marker is written even when the migration is a no-op (already INTEGER).

The point of the marker is to skip the sqlite3 open on subsequent calls,
not to record that a conversion happened. So a clean palace gets the
marker on first run too — next ``_fix_blob_seq_ids`` call short-circuits
before touching the sqlite3 file.
"""
from mempalace.backends.chroma import _BLOB_FIX_MARKER

db_path = tmp_path / "chroma.sqlite3"
conn = sqlite3.connect(str(db_path))
conn.execute("CREATE TABLE embeddings (rowid INTEGER PRIMARY KEY, seq_id INTEGER)")
conn.execute("INSERT INTO embeddings (seq_id) VALUES (42)")
conn.commit()
conn.close()

marker = tmp_path / _BLOB_FIX_MARKER
assert not marker.exists()

_fix_blob_seq_ids(str(tmp_path))

assert marker.is_file(), "marker must be written even when no BLOBs found"


def test_fix_blob_seq_ids_skips_sqlite_when_marker_present(tmp_path):
"""When the marker exists, ``_fix_blob_seq_ids`` does not open sqlite3.

This is the load-bearing property of the marker — opening Python's
sqlite3 against a live ChromaDB 1.5.x WAL DB corrupts the next
PersistentClient call (#1090). Once a palace has been migrated, we
never want to open it again, even read-only.
"""
from unittest.mock import patch
from mempalace.backends.chroma import _BLOB_FIX_MARKER

# Pre-create the marker so the function should short-circuit.
db_path = tmp_path / "chroma.sqlite3"
db_path.write_bytes(b"sentinel") # presence required for the function to proceed
(tmp_path / _BLOB_FIX_MARKER).touch()

with patch("mempalace.backends.chroma.sqlite3.connect") as mock_connect:
_fix_blob_seq_ids(str(tmp_path))

mock_connect.assert_not_called()


# ── quarantine_stale_hnsw ─────────────────────────────────────────────────


Expand Down