Skip to content

Commit 73541d1

Browse files
authored
Merge pull request #1262 from Legion345/fix/stop-hook-crash
fix(storage): stop ChromaDB from crashing when reopening an existing …
2 parents 96bb80a + d7f4638 commit 73541d1

2 files changed

Lines changed: 39 additions & 9 deletions

File tree

mempalace/backends/chroma.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from typing import Any, Optional
99

1010
import chromadb
11+
from chromadb.errors import NotFoundError as _ChromaNotFoundError
1112

1213
from .base import (
1314
BaseBackend,
@@ -1093,15 +1094,18 @@ def get_collection(
10931094
ef_kwargs = {"embedding_function": ef} if ef is not None else {}
10941095

10951096
if create:
1096-
collection = client.get_or_create_collection(
1097-
collection_name,
1098-
metadata={
1099-
"hnsw:space": hnsw_space,
1100-
"hnsw:num_threads": 1,
1101-
**_HNSW_BLOAT_GUARD,
1102-
},
1103-
**ef_kwargs,
1104-
)
1097+
try:
1098+
collection = client.get_collection(collection_name, **ef_kwargs)
1099+
except _ChromaNotFoundError:
1100+
collection = client.create_collection(
1101+
collection_name,
1102+
metadata={
1103+
"hnsw:space": hnsw_space,
1104+
"hnsw:num_threads": 1,
1105+
**_HNSW_BLOAT_GUARD,
1106+
},
1107+
**ef_kwargs,
1108+
)
11051109
else:
11061110
collection = client.get_collection(collection_name, **ef_kwargs)
11071111
_pin_hnsw_threads(collection)

tests/test_backends.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,32 @@ def test_chroma_backend_create_collection_sets_hnsw_bloat_guard(tmp_path):
372372
assert col.metadata.get("hnsw:sync_threshold") == 50_000
373373

374374

375+
def test_get_collection_create_true_is_idempotent(tmp_path):
376+
"""Calling get_collection(create=True) twice on the same name must not crash.
377+
378+
ChromaDB 1.5.x's Rust bindings SIGSEGV when get_or_create_collection is
379+
called with metadata that differs from the stored collection metadata. The
380+
fix splits the call into get_collection -> fallback create_collection so the
381+
metadata-comparison codepath in chromadb_rust_bindings is never reached for
382+
existing collections. Regression guard for issue #1089.
383+
"""
384+
palace = str(tmp_path / "palace")
385+
backend = ChromaBackend()
386+
backend.get_collection(palace, collection_name="mempalace_drawers", create=True)
387+
col2 = backend.get_collection(palace, collection_name="mempalace_drawers", create=True)
388+
assert isinstance(col2, ChromaCollection)
389+
390+
391+
def test_get_collection_create_true_preserves_existing_metadata(tmp_path):
392+
"""Existing collection metadata is not overwritten when reopened with create=True."""
393+
palace = str(tmp_path / "palace")
394+
backend = ChromaBackend()
395+
backend.get_collection(palace, collection_name="mempalace_drawers", create=True)
396+
col = backend.get_collection(palace, collection_name="mempalace_drawers", create=True)
397+
assert col._collection.metadata["hnsw:space"] == "cosine"
398+
assert col._collection.metadata.get("hnsw:batch_size") == 50_000
399+
400+
375401
def test_fix_blob_seq_ids_converts_blobs_to_integers(tmp_path):
376402
"""Simulate a ChromaDB 0.6.x database with BLOB seq_ids and verify repair."""
377403
db_path = tmp_path / "chroma.sqlite3"

0 commit comments

Comments
 (0)