Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions mempalace/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

from .config import MempalaceConfig
from .palace import get_collection as _get_collection
from .searcher import build_where_filter
from .searcher import _first_or_empty, build_where_filter


# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -272,9 +272,9 @@ def search(self, query: str, wing: str = None, room: str = None, n_results: int
except Exception as e:
return f"Search error: {e}"

docs = results["documents"][0]
metas = results["metadatas"][0]
dists = results["distances"][0]
docs = _first_or_empty(results, "documents")
metas = _first_or_empty(results, "metadatas")
dists = _first_or_empty(results, "distances")

if not docs:
return "No results found."
Expand Down Expand Up @@ -323,9 +323,9 @@ def search_raw(

hits = []
for doc, meta, dist in zip(
results["documents"][0],
results["metadatas"][0],
results["distances"][0],
_first_or_empty(results, "documents"),
_first_or_empty(results, "metadatas"),
_first_or_empty(results, "distances"),
):
hits.append(
{
Expand Down
34 changes: 24 additions & 10 deletions mempalace/searcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,20 @@ class SearchError(Exception):
_TOKEN_RE = re.compile(r"\w{2,}", re.UNICODE)


def _first_or_empty(results: dict, key: str) -> list:
"""Return the first inner list of a ChromaDB query result, or [].

ChromaDB returns shapes like ``{"documents": [["a", "b"]], ...}`` for a
successful query, but ``{"documents": [], ...}`` (empty outer list) when
the collection is empty or the filter excludes everything. Indexing
``[0]`` blindly raises IndexError in that case (issue #195).
"""
outer = results.get(key)
if not outer:
return []
return outer[0] or []


def _tokenize(text: str) -> list:
"""Lowercase + strip to alphanumeric tokens of length ≥ 2."""
return _TOKEN_RE.findall(text.lower())
Expand Down Expand Up @@ -251,9 +265,9 @@ def search(query: str, palace_path: str, wing: str = None, room: str = None, n_r
print(f"\n Search error: {e}")
raise SearchError(f"Search error: {e}") from e

docs = results["documents"][0]
metas = results["metadatas"][0]
dists = results["distances"][0]
docs = _first_or_empty(results, "documents")
metas = _first_or_empty(results, "metadatas")
dists = _first_or_empty(results, "distances")

if not docs:
print(f'\n No results found for: "{query}"')
Expand Down Expand Up @@ -353,9 +367,9 @@ def search_memories(
closet_results = closets_col.query(**ckwargs)
for rank, (cdoc, cmeta, cdist) in enumerate(
zip(
closet_results["documents"][0],
closet_results["metadatas"][0],
closet_results["distances"][0],
_first_or_empty(closet_results, "documents"),
_first_or_empty(closet_results, "metadatas"),
_first_or_empty(closet_results, "distances"),
)
):
source = cmeta.get("source_file", "")
Expand All @@ -372,9 +386,9 @@ def search_memories(

scored: list = []
for doc, meta, dist in zip(
drawer_results["documents"][0],
drawer_results["metadatas"][0],
drawer_results["distances"][0],
_first_or_empty(drawer_results, "documents"),
_first_or_empty(drawer_results, "metadatas"),
_first_or_empty(drawer_results, "distances"),
):
# Filter on raw distance before rounding to avoid precision loss.
if max_distance > 0.0 and dist > max_distance:
Expand Down Expand Up @@ -482,6 +496,6 @@ def search_memories(
return {
"query": query,
"filters": {"wing": wing, "room": room},
"total_before_filter": len(drawer_results["documents"][0]),
"total_before_filter": len(_first_or_empty(drawer_results, "documents")),
"results": hits,
}
48 changes: 48 additions & 0 deletions tests/test_empty_chromadb_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Regression tests for issue #195 — IndexError on empty ChromaDB results.

Before the fix, `searcher.search()`, `searcher.search_memories()`, and
`Layer3.search()` indexed `results["documents"][0]` without checking the
outer list, so a query against an empty collection (or a wing/room
filter that excluded everything) crashed with IndexError instead of
returning a graceful "no results" response.
"""

import pytest

from mempalace.searcher import _first_or_empty


def test_first_or_empty_handles_empty_outer_list():
"""The shape ChromaDB returns from an empty collection (issue #195)."""
results = {"documents": [], "metadatas": [], "distances": []}
assert _first_or_empty(results, "documents") == []
assert _first_or_empty(results, "metadatas") == []
assert _first_or_empty(results, "distances") == []


def test_first_or_empty_handles_outer_with_empty_inner():
"""ChromaDB also returns ``{"documents": [[]]}`` in some versions —
must yield [] either way."""
assert _first_or_empty({"documents": [[]]}, "documents") == []


def test_first_or_empty_handles_missing_key():
assert _first_or_empty({}, "documents") == []


def test_first_or_empty_handles_none_inner():
"""``[None]`` (unusual but observed) must not blow up."""
assert _first_or_empty({"documents": [None]}, "documents") == []


def test_first_or_empty_returns_inner_list_for_normal_result():
results = {"documents": [["a", "b", "c"]]}
assert _first_or_empty(results, "documents") == ["a", "b", "c"]


def test_raw_indexing_still_raises_to_document_the_bug():
"""Document the original failure mode so future readers understand
why _first_or_empty exists."""
results = {"documents": []}
with pytest.raises(IndexError):
_ = results["documents"][0]