Skip to content

Commit daedfb1

Browse files
committed
fix: serialize ChromaCollection writes via flock at the backend seam
Claude Code spawns one mcp_server.py process per open terminal; stop hooks spawn additional short-lived writers (diary writes, `mempalace mine` subprocesses). All open independent PersistentClient instances against the same palace directory. ChromaDB has no inter-process write locking — concurrent col.add/upsert/update/delete from N processes corrupts the HNSW segment, causing the next read to SIGSEGV in chromadb_rust_bindings. Fix: `_palace_write_lock(palace_path)` — a contextmanager using fcntl.flock(LOCK_EX) on `$palace/.write.lock` — wraps all four write methods of ChromaCollection (add, upsert, update, delete). Because every caller (mcp_server, miner, convo_miner, palace) reaches ChromaDB through this adapter, all writes serialize automatically across processes. RFC 001 made ChromaCollection the single boundary for all ChromaDB writes, which is the correct home for concurrency control. No caller needs to know the lock exists. ChromaCollection now accepts palace_path in its constructor; passed by ChromaBackend.get_collection and the legacy create_collection shim. Adapters constructed without a path (some tests) skip locking, same behavior as before. flock auto-releases on process death — a mid-write crash cannot deadlock future writers. On Windows, fcntl is unavailable — yields without locking. Windows users running multiple MCP server processes against the same palace remain exposed to the underlying ChromaDB concurrency issue; palace-daemon, which provides proper asyncio read/write/mine semaphores, is the recommended path for multi-client setups on any platform. Test plan: 1094 tests pass locally.
1 parent 8ac98f0 commit daedfb1

2 files changed

Lines changed: 75 additions & 11 deletions

File tree

mempalace/backends/chroma.py

Lines changed: 69 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,19 @@
44
import logging
55
import os
66
import sqlite3
7+
from contextlib import contextmanager
78
from typing import Any, Optional
89

910
import chromadb
1011

12+
try:
13+
import fcntl as _fcntl
14+
15+
_HAS_FLOCK = True
16+
except ImportError:
17+
_fcntl = None # type: ignore[assignment]
18+
_HAS_FLOCK = False # Windows — no cross-process flock available
19+
1120
from .base import (
1221
BaseBackend,
1322
BaseCollection,
@@ -177,14 +186,61 @@ def _as_list(v: Any) -> list:
177186
return [v]
178187

179188

189+
@contextmanager
190+
def _palace_write_lock(palace_path: Optional[str]):
191+
"""Cross-process exclusive write lock for ChromaDB writes.
192+
193+
Claude Code spawns one mcp_server.py per open terminal; stop hooks spawn
194+
additional short-lived writers (diary writes, mine subprocesses). All open
195+
independent PersistentClient instances against the same palace directory.
196+
ChromaDB has no inter-process write locking — concurrent col.add/upsert/
197+
update/delete from N processes corrupts the HNSW segment, causing the next
198+
read to SIGSEGV in chromadb_rust_bindings.
199+
200+
Serializing all writes with flock(LOCK_EX) on a lock file in the palace
201+
directory prevents the corruption. flock auto-releases on process death —
202+
a mid-write crash cannot deadlock future writers.
203+
204+
On Windows, fcntl is unavailable — yields without locking. Windows users
205+
running multiple MCP server processes remain exposed to the underlying
206+
ChromaDB concurrency issue. palace-daemon, which provides proper asyncio
207+
semaphores, is the recommended path for multi-client setups on any
208+
platform.
209+
210+
palace_path may be None when the adapter is wrapping a collection whose
211+
owning palace path isn't known (e.g. tests); in that case locking is
212+
skipped.
213+
"""
214+
if not _HAS_FLOCK or not palace_path:
215+
yield
216+
return
217+
try:
218+
os.makedirs(palace_path, exist_ok=True)
219+
except OSError:
220+
pass
221+
lock_path = os.path.join(palace_path, ".write.lock")
222+
with open(lock_path, "a") as _lf:
223+
_fcntl.flock(_lf.fileno(), _fcntl.LOCK_EX)
224+
try:
225+
yield
226+
finally:
227+
_fcntl.flock(_lf.fileno(), _fcntl.LOCK_UN)
228+
229+
180230
class ChromaCollection(BaseCollection):
181-
"""Thin adapter translating ChromaDB dict returns into typed results."""
231+
"""Thin adapter translating ChromaDB dict returns into typed results.
232+
233+
Wraps all write methods (add/upsert/update/delete) in a cross-process
234+
flock so concurrent MCP servers + mine subprocesses cannot corrupt the
235+
HNSW segment by racing their writes.
236+
"""
182237

183-
def __init__(self, collection):
238+
def __init__(self, collection, palace_path: Optional[str] = None):
184239
self._collection = collection
240+
self._palace_path = palace_path
185241

186242
# ------------------------------------------------------------------
187-
# Writes
243+
# Writes (serialized via cross-process flock on palace dir)
188244
# ------------------------------------------------------------------
189245

190246
def add(self, *, documents, ids, metadatas=None, embeddings=None):
@@ -193,15 +249,17 @@ def add(self, *, documents, ids, metadatas=None, embeddings=None):
193249
kwargs["metadatas"] = metadatas
194250
if embeddings is not None:
195251
kwargs["embeddings"] = embeddings
196-
self._collection.add(**kwargs)
252+
with _palace_write_lock(self._palace_path):
253+
self._collection.add(**kwargs)
197254

198255
def upsert(self, *, documents, ids, metadatas=None, embeddings=None):
199256
kwargs: dict[str, Any] = {"documents": documents, "ids": ids}
200257
if metadatas is not None:
201258
kwargs["metadatas"] = metadatas
202259
if embeddings is not None:
203260
kwargs["embeddings"] = embeddings
204-
self._collection.upsert(**kwargs)
261+
with _palace_write_lock(self._palace_path):
262+
self._collection.upsert(**kwargs)
205263

206264
def update(
207265
self,
@@ -220,7 +278,8 @@ def update(
220278
kwargs["metadatas"] = metadatas
221279
if embeddings is not None:
222280
kwargs["embeddings"] = embeddings
223-
self._collection.update(**kwargs)
281+
with _palace_write_lock(self._palace_path):
282+
self._collection.update(**kwargs)
224283

225284
# ------------------------------------------------------------------
226285
# Reads
@@ -364,7 +423,8 @@ def delete(self, *, ids=None, where=None):
364423
kwargs["ids"] = ids
365424
if where is not None:
366425
kwargs["where"] = where
367-
self._collection.delete(**kwargs)
426+
with _palace_write_lock(self._palace_path):
427+
self._collection.delete(**kwargs)
368428

369429
def count(self):
370430
return self._collection.count()
@@ -539,7 +599,7 @@ def get_collection(
539599
)
540600
else:
541601
collection = client.get_collection(collection_name)
542-
return ChromaCollection(collection)
602+
return ChromaCollection(collection, palace_path=palace_path)
543603

544604
def close_palace(self, palace) -> None:
545605
"""Drop cached handles for ``palace``. Accepts ``PalaceRef`` or legacy path str."""
@@ -582,7 +642,7 @@ def create_collection(
582642
collection = self._client(palace_path).create_collection(
583643
collection_name, metadata={"hnsw:space": hnsw_space}
584644
)
585-
return ChromaCollection(collection)
645+
return ChromaCollection(collection, palace_path=palace_path)
586646

587647

588648
def _normalize_get_collection_args(args, kwargs):

mempalace/mcp_server.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,12 +220,16 @@ def _get_collection(create=False):
220220
_collection_cache = ChromaCollection(
221221
client.get_or_create_collection(
222222
_config.collection_name, metadata={"hnsw:space": "cosine"}
223-
)
223+
),
224+
palace_path=_config.palace_path,
224225
)
225226
_metadata_cache = None
226227
_metadata_cache_time = 0
227228
elif _collection_cache is None:
228-
_collection_cache = ChromaCollection(client.get_collection(_config.collection_name))
229+
_collection_cache = ChromaCollection(
230+
client.get_collection(_config.collection_name),
231+
palace_path=_config.palace_path,
232+
)
229233
_metadata_cache = None
230234
_metadata_cache_time = 0
231235
return _collection_cache

0 commit comments

Comments
 (0)