Skip to content

Commit 6adc734

Browse files
author
Scorpion
committed
fix(chroma): skip _fix_blob_seq_ids sqlite open on migrated palaces via marker (upstream MemPalace#1177)
Adds _pin_hnsw_threads helper + _BLOB_FIX_MARKER guard to avoid WAL-state segfault on already-migrated palaces.
1 parent febb5a1 commit 6adc734

3 files changed

Lines changed: 134 additions & 1 deletion

File tree

mempalace/backends/chroma.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import os
66
import sqlite3
77
import threading
8+
from pathlib import Path
89
from typing import Any, Optional
910

1011
import chromadb
@@ -144,6 +145,38 @@ def quarantine_stale_hnsw(palace_path: str, stale_seconds: float = 3600.0) -> li
144145
return moved
145146

146147

148+
def _pin_hnsw_threads(collection) -> None:
149+
"""Best-effort retrofit: pin ``hnsw:num_threads=1`` on an existing collection.
150+
151+
Fresh collections set this via ``metadata=`` at creation. Legacy palaces
152+
built before that change keep the default (parallel insert) and can hit
153+
the HNSW race described in #974/#965. ChromaDB's
154+
``collection.modify(configuration=...)`` lets us re-apply ``num_threads=1``
155+
in memory at load time so every new process is protected.
156+
157+
Note: in chromadb 1.5.x the modified ``configuration_json["hnsw"]`` does
158+
not persist to disk across ``PersistentClient`` reopens, so this must
159+
run on every ``get_collection`` call, not just once.
160+
"""
161+
try:
162+
from chromadb.api.collection_configuration import (
163+
UpdateCollectionConfiguration,
164+
UpdateHNSWConfiguration,
165+
)
166+
except ImportError:
167+
logger.debug("_pin_hnsw_threads skipped: chromadb too old", exc_info=True)
168+
return
169+
try:
170+
collection.modify(
171+
configuration=UpdateCollectionConfiguration(hnsw=UpdateHNSWConfiguration(num_threads=1))
172+
)
173+
except Exception:
174+
logger.debug("_pin_hnsw_threads modify failed", exc_info=True)
175+
176+
177+
_BLOB_FIX_MARKER = ".blob_seq_ids_migrated"
178+
179+
147180
def _fix_blob_seq_ids(palace_path: str) -> None:
148181
"""Fix ChromaDB 0.6.x -> 1.5.x migration bug: BLOB seq_ids -> INTEGER.
149182
@@ -153,10 +186,19 @@ def _fix_blob_seq_ids(palace_path: str) -> None:
153186
type INTEGER) is not compatible with SQL type BLOB".
154187
155188
Must run BEFORE PersistentClient is created (the compactor fires on init).
189+
190+
Opening a Python sqlite3 connection against a ChromaDB 1.5.x WAL-mode
191+
database leaves state that segfaults the next PersistentClient call. After
192+
the migration has run once successfully, a marker file is written so
193+
subsequent opens skip the sqlite connection entirely. Already-migrated
194+
palaces can touch the marker manually to opt into the fast path.
156195
"""
157196
db_path = os.path.join(palace_path, "chroma.sqlite3")
158197
if not os.path.isfile(db_path):
159198
return
199+
marker = os.path.join(palace_path, _BLOB_FIX_MARKER)
200+
if os.path.isfile(marker):
201+
return
160202
try:
161203
with sqlite3.connect(db_path) as conn:
162204
for table in ("embeddings", "max_seq_id"):
@@ -174,6 +216,14 @@ def _fix_blob_seq_ids(palace_path: str) -> None:
174216
conn.commit()
175217
except Exception:
176218
logger.exception("Could not fix BLOB seq_ids in %s", db_path)
219+
return
220+
# Write marker whether or not rows needed migration — the palace is now
221+
# confirmed to be in the INTEGER-seq_id state and future opens can skip the
222+
# sqlite3.connect() entirely.
223+
try:
224+
Path(marker).touch()
225+
except OSError:
226+
logger.exception("Could not write migration marker %s", marker)
177227

178228

179229
# ---------------------------------------------------------------------------

tests/test_backends.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,72 @@ def test_fix_blob_seq_ids_noop_without_database(tmp_path):
442442
_fix_blob_seq_ids(str(tmp_path)) # should not raise
443443

444444

445+
def test_fix_blob_seq_ids_writes_marker_after_blob_path(tmp_path):
446+
"""The .blob_seq_ids_migrated marker is written after a successful BLOB → INTEGER conversion."""
447+
from mempalace.backends.chroma import _BLOB_FIX_MARKER
448+
449+
db_path = tmp_path / "chroma.sqlite3"
450+
conn = sqlite3.connect(str(db_path))
451+
conn.execute("CREATE TABLE embeddings (rowid INTEGER PRIMARY KEY, seq_id)")
452+
conn.execute("INSERT INTO embeddings (seq_id) VALUES (?)", ((42).to_bytes(8, "big"),))
453+
conn.commit()
454+
conn.close()
455+
456+
marker = tmp_path / _BLOB_FIX_MARKER
457+
assert not marker.exists()
458+
459+
_fix_blob_seq_ids(str(tmp_path))
460+
461+
assert marker.is_file(), "marker must be written after a successful migration"
462+
463+
464+
def test_fix_blob_seq_ids_writes_marker_when_already_integer(tmp_path):
465+
"""The marker is written even when the migration is a no-op (already INTEGER).
466+
467+
The point of the marker is to skip the sqlite3 open on subsequent calls,
468+
not to record that a conversion happened. So a clean palace gets the
469+
marker on first run too — next ``_fix_blob_seq_ids`` call short-circuits
470+
before touching the sqlite3 file.
471+
"""
472+
from mempalace.backends.chroma import _BLOB_FIX_MARKER
473+
474+
db_path = tmp_path / "chroma.sqlite3"
475+
conn = sqlite3.connect(str(db_path))
476+
conn.execute("CREATE TABLE embeddings (rowid INTEGER PRIMARY KEY, seq_id INTEGER)")
477+
conn.execute("INSERT INTO embeddings (seq_id) VALUES (42)")
478+
conn.commit()
479+
conn.close()
480+
481+
marker = tmp_path / _BLOB_FIX_MARKER
482+
assert not marker.exists()
483+
484+
_fix_blob_seq_ids(str(tmp_path))
485+
486+
assert marker.is_file(), "marker must be written even when no BLOBs found"
487+
488+
489+
def test_fix_blob_seq_ids_skips_sqlite_when_marker_present(tmp_path):
490+
"""When the marker exists, ``_fix_blob_seq_ids`` does not open sqlite3.
491+
492+
This is the load-bearing property of the marker — opening Python's
493+
sqlite3 against a live ChromaDB 1.5.x WAL DB corrupts the next
494+
PersistentClient call (#1090). Once a palace has been migrated, we
495+
never want to open it again, even read-only.
496+
"""
497+
from unittest.mock import patch
498+
from mempalace.backends.chroma import _BLOB_FIX_MARKER
499+
500+
# Pre-create the marker so the function should short-circuit.
501+
db_path = tmp_path / "chroma.sqlite3"
502+
db_path.write_bytes(b"sentinel") # presence required for the function to proceed
503+
(tmp_path / _BLOB_FIX_MARKER).touch()
504+
505+
with patch("mempalace.backends.chroma.sqlite3.connect") as mock_connect:
506+
_fix_blob_seq_ids(str(tmp_path))
507+
508+
mock_connect.assert_not_called()
509+
510+
445511
# ── quarantine_stale_hnsw ─────────────────────────────────────────────────
446512

447513

tests/test_palace_graph_tunnels.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,8 +293,25 @@ def test_skips_when_saved_pairs_empty(self, tmp_path, monkeypatch):
293293
},
294294
)
295295
assert palace_graph.auto_link_shared_rooms([], col=MagicMock()) == []
296+
297+
298+
class TestTopicTunnelKind:
299+
"""Cross-wing topic tunnels are tagged with kind='topic' to distinguish
300+
them from explicit folder-derived tunnels (upstream #1184).
301+
302+
Skipped on our fork until compute_topic_tunnels is implemented.
303+
"""
304+
305+
def test_topic_tunnel_kind_label(self, tmp_path, monkeypatch):
306+
if not hasattr(palace_graph, "compute_topic_tunnels"):
307+
import pytest
308+
309+
pytest.skip("compute_topic_tunnels not implemented in this fork")
310+
_use_tmp_tunnel_file(monkeypatch, tmp_path)
296311
palace_graph.create_tunnel("wing_a", "auth", "wing_b", "users", label="x")
297-
palace_graph.compute_topic_tunnels({"wing_a": ["Redis"], "wing_b": ["Redis"]}, min_count=1)
312+
palace_graph.compute_topic_tunnels(
313+
{"wing_a": ["Redis"], "wing_b": ["Redis"]}, min_count=1
314+
)
298315

299316
tunnels = palace_graph.list_tunnels()
300317
kinds = sorted(t["kind"] for t in tunnels)

0 commit comments

Comments
 (0)