Skip to content

Commit ba9220b

Browse files
committed
Refactor internal filter evaluation in SimpleVectorStore
- Internal implementation improvements - Update documentation - Adjust class visibility Signed-off-by: Christian Tzolov <christian.tzolov@broadcom.com>
1 parent 32c79b0 commit ba9220b

8 files changed

Lines changed: 528 additions & 423 deletions

File tree

spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs.adoc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,9 @@ These are the available implementations of the `VectorStore` interface:
398398
* xref:api/vectordbs/typesense.adoc[Typesense Vector Store] - The https://typesense.org/docs/0.24.0/api/vector-search.html[Typesense] vector store.
399399
* xref:api/vectordbs/weaviate.adoc[Weaviate Vector Store] - The https://weaviate.io/[Weaviate] vector store.
400400
* xref:api/vectordbs/s3-vector-store.adoc[S3 Vector Store] - The https://aws.amazon.com/s3/features/vectors/[AWS S3] vector store.
401-
* link:https://github.com/spring-projects/spring-ai/blob/main/spring-ai-vector-store/src/main/java/org/springframework/ai/vectorstore/SimpleVectorStore.java[SimpleVectorStore] - A simple implementation of persistent vector storage, good for educational purposes.
401+
* link:https://github.com/spring-projects/spring-ai/blob/main/spring-ai-vector-store/src/main/java/org/springframework/ai/vectorstore/SimpleVectorStore.java[SimpleVectorStore] - A simple implementation of a vector storage, good only for testing purposes.
402+
403+
WARNING: The `SimpleVectorStore` implementation is not designed for production use and should only be used for testing or demonstration purposes.
402404

403405
More implementations may be supported in future releases.
404406

spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/understand-vectordbs.adoc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,5 +91,5 @@ It expands the two-dimensional definitions of Magnitude and Dot Product given pr
9191
stem:[similarity(vec{A},vec{B}) = \cos(\theta) = \frac{ \sum_{i=1}^{n} {A_i B_i} }{ \sqrt{\sum_{i=1}^{n}{A_i^2} \cdot \sum_{i=1}^{n}{B_i^2}}]
9292
****
9393

94-
This is the key formula used in the simple implementation of a vector store and can be found in the `SimpleVectorStore` implementation.
94+
This is the key formula used in the simple implementation of a vector store and can be found in the `SimpleVectorStore` implementation - applicable for testing and demonstration purposes only.
9595

spring-ai-vector-store/src/main/java/org/springframework/ai/vectorstore/SimpleVectorStore.java

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -47,24 +47,34 @@
4747
import org.springframework.ai.observation.conventions.VectorStoreSimilarityMetric;
4848
import org.springframework.ai.util.JacksonUtils;
4949
import org.springframework.ai.vectorstore.filter.Filter;
50-
import org.springframework.ai.vectorstore.filter.FilterExpressionConverter;
51-
import org.springframework.ai.vectorstore.filter.converter.SimpleVectorStoreFilterExpressionConverter;
5250
import org.springframework.ai.vectorstore.observation.AbstractObservationVectorStore;
5351
import org.springframework.ai.vectorstore.observation.VectorStoreObservationContext;
5452
import org.springframework.core.io.Resource;
55-
import org.springframework.expression.ExpressionParser;
56-
import org.springframework.expression.spel.standard.SpelExpressionParser;
57-
import org.springframework.expression.spel.support.StandardEvaluationContext;
5853

5954
/**
60-
* SimpleVectorStore is a simple implementation of the VectorStore interface.
55+
* A simple, in-memory implementation of the <a href=
56+
* "https://docs.spring.io/spring-ai/reference/api/vectordbs.html#_understanding_vectors">VectorStore</a>
57+
* interface.
6158
*
62-
* It also provides methods to save the current state of the vectors to a file, and to
63-
* load vectors from a file.
59+
* <p>
60+
* Uses a {@link java.util.concurrent.ConcurrentHashMap} to store vectors and their
61+
* associated metadata. Map keys are document IDs; values are
62+
* {@link SimpleVectorStoreContent} instances that encapsulate each document's text,
63+
* metadata, and embedding vector.
6464
*
65-
* For a deeper understanding of the mathematical concepts and computations involved in
66-
* calculating similarity scores among vectors, refer to this
67-
* [resource](https://docs.spring.io/spring-ai/reference/api/vectordbs.html#_understanding_vectors).
65+
* <p>
66+
* Similarity search is performed using cosine similarity over all stored vectors. Filter
67+
* expressions on document metadata are evaluated via
68+
* {@link SimpleVectorStoreFilterExpressionEvaluator}.
69+
*
70+
* <p>
71+
* The store can be persisted to and restored from a JSON file via the
72+
* {@link #save(java.io.File)} and {@link #load(java.io.File)} /
73+
* {@link #load(org.springframework.core.io.Resource)} methods.
74+
*
75+
* <p>
76+
* <b>NOTE</b>: This implementation is not designed for production use and should only be
77+
* used for testing or demonstration purposes.
6878
*
6979
* @author Raphael Yu
7080
* @author Dingmeng Xue
@@ -75,24 +85,22 @@
7585
* @author Thomas Vitale
7686
* @author Jemin Huh
7787
* @author David Yu
88+
* @since 1.0.0
7889
*/
7990
public class SimpleVectorStore extends AbstractObservationVectorStore {
8091

8192
private static final Logger logger = LoggerFactory.getLogger(SimpleVectorStore.class);
8293

8394
private final JsonMapper jsonMapper;
8495

85-
private final ExpressionParser expressionParser;
86-
87-
private final FilterExpressionConverter filterExpressionConverter;
96+
private final SimpleVectorStoreFilterExpressionEvaluator filterExpressionEvaluator;
8897

8998
protected Map<String, SimpleVectorStoreContent> store = new ConcurrentHashMap<>();
9099

91100
protected SimpleVectorStore(SimpleVectorStoreBuilder builder) {
92101
super(builder);
93102
this.jsonMapper = JsonMapper.builder().addModules(JacksonUtils.instantiateAvailableModules()).build();
94-
this.expressionParser = new SpelExpressionParser();
95-
this.filterExpressionConverter = new SimpleVectorStoreFilterExpressionConverter();
103+
this.filterExpressionEvaluator = new SimpleVectorStoreFilterExpressionEvaluator();
96104
}
97105

98106
/**
@@ -154,14 +162,7 @@ private Predicate<SimpleVectorStoreContent> doFilterPredicate(Filter.@Nullable E
154162
if (filterExpression == null) {
155163
return document -> true;
156164
}
157-
158-
return document -> {
159-
StandardEvaluationContext context = new StandardEvaluationContext();
160-
context.setVariable("metadata", document.getMetadata());
161-
return Boolean.TRUE.equals(this.expressionParser
162-
.parseExpression(this.filterExpressionConverter.convertExpression(filterExpression))
163-
.getValue(context, Boolean.class));
164-
};
165+
return document -> this.filterExpressionEvaluator.evaluate(filterExpression, document.getMetadata());
165166
}
166167

167168
/**

spring-ai-vector-store/src/main/java/org/springframework/ai/vectorstore/SimpleVectorStoreContent.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
* embeddings. This class is thread-safe and all its fields are final and deeply
3939
* immutable. The embedding vector is required for all instances of this class.
4040
*/
41-
public final class SimpleVectorStoreContent implements Content {
41+
final class SimpleVectorStoreContent implements Content {
4242

4343
private final String id;
4444

@@ -54,7 +54,7 @@ public final class SimpleVectorStoreContent implements Content {
5454
* @param text the content text, must not be null
5555
* @param embedding the embedding vector, must not be null
5656
*/
57-
public SimpleVectorStoreContent(@JsonProperty("text") @JsonAlias("content") String text,
57+
SimpleVectorStoreContent(@JsonProperty("text") @JsonAlias("content") String text,
5858
@JsonProperty("embedding") float[] embedding) {
5959
this(text, new HashMap<>(), embedding);
6060
}
@@ -65,7 +65,7 @@ public SimpleVectorStoreContent(@JsonProperty("text") @JsonAlias("content") Stri
6565
* @param metadata the metadata map, must not be null
6666
* @param embedding the embedding vector, must not be null
6767
*/
68-
public SimpleVectorStoreContent(String text, Map<String, Object> metadata, float[] embedding) {
68+
SimpleVectorStoreContent(String text, Map<String, Object> metadata, float[] embedding) {
6969
this(text, metadata, new RandomIdGenerator(), embedding);
7070
}
7171

@@ -77,8 +77,7 @@ public SimpleVectorStoreContent(String text, Map<String, Object> metadata, float
7777
* @param idGenerator the ID generator to use, must not be null
7878
* @param embedding the embedding vector, must not be null
7979
*/
80-
public SimpleVectorStoreContent(String text, Map<String, Object> metadata, IdGenerator idGenerator,
81-
float[] embedding) {
80+
SimpleVectorStoreContent(String text, Map<String, Object> metadata, IdGenerator idGenerator, float[] embedding) {
8281
this(idGenerator.generateId(text, metadata), text, metadata, embedding);
8382
}
8483

@@ -91,7 +90,7 @@ public SimpleVectorStoreContent(String text, Map<String, Object> metadata, IdGen
9190
* @throws IllegalArgumentException if any parameter is null or if id is empty
9291
*/
9392
@JsonCreator(mode = JsonCreator.Mode.PROPERTIES)
94-
public SimpleVectorStoreContent(@JsonProperty("id") @Nullable String id,
93+
SimpleVectorStoreContent(@JsonProperty("id") @Nullable String id,
9594
@JsonProperty("text") @JsonAlias("content") String text,
9695
@JsonProperty("metadata") Map<String, Object> metadata, @JsonProperty("embedding") float[] embedding) {
9796

Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
/*
2+
* Copyright 2023-present the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.springframework.ai.vectorstore;
18+
19+
import java.time.ZoneOffset;
20+
import java.time.format.DateTimeFormatter;
21+
import java.util.Date;
22+
import java.util.List;
23+
import java.util.Map;
24+
import java.util.Objects;
25+
26+
import org.jspecify.annotations.Nullable;
27+
28+
import org.springframework.ai.vectorstore.filter.Filter;
29+
import org.springframework.ai.vectorstore.filter.FilterExpressionBuilder;
30+
31+
/**
32+
* Internal helper used by {@link SimpleVectorStore} to evaluate a
33+
* {@link Filter.Expression} AST directly against a document metadata map, without
34+
* converting to an intermediate string representation (e.g. SpEL or SQL).
35+
*
36+
* <p>
37+
* Supports all {@link Filter.ExpressionType} operations:
38+
* <ul>
39+
* <li>Logical: {@code AND}, {@code OR}, {@code NOT}</li>
40+
* <li>Comparison: {@code EQ}, {@code NE}, {@code GT}, {@code GTE}, {@code LT},
41+
* {@code LTE}</li>
42+
* <li>Collection: {@code IN}, {@code NIN}</li>
43+
* <li>Null checks: {@code ISNULL}, {@code ISNOTNULL}</li>
44+
* </ul>
45+
*
46+
* <p>
47+
* <b>Type handling:</b> Numbers are promoted to {@code double} so that mixed
48+
* {@code Integer}/{@code Long}/{@code Double} metadata values compare correctly.
49+
* {@link Date} filter values are normalised to their ISO-8601 UTC string representation
50+
* (matching the format used to store dates in metadata).
51+
*
52+
* <p>
53+
* <b>Missing-key semantics:</b> A metadata key that is absent is treated as {@code null}.
54+
* Null ordering follows SQL {@code NULLS FIRST} semantics — {@code null} is considered
55+
* less than any non-null value. Consequently, ordered comparisons ({@code GT},
56+
* {@code GTE}, {@code LT}, {@code LTE}) against a missing key evaluate as if that key
57+
* holds the smallest possible value (e.g. {@code year > 2020} returns {@code false} when
58+
* {@code year} is absent).
59+
*
60+
* @author Christian Tzolov
61+
*/
62+
final class SimpleVectorStoreFilterExpressionEvaluator {
63+
64+
// 'Z' is intentionally a literal suffix, not the offset pattern 'X'. Combined with
65+
// withZone(UTC) this always produces the fixed form "yyyy-MM-dd'T'HH:mm:ss'Z'",
66+
// matching the format used to store Date values in document metadata.
67+
private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'")
68+
.withZone(ZoneOffset.UTC);
69+
70+
/**
71+
* Evaluates the given filter expression against the provided metadata map.
72+
* @param expression the filter expression to evaluate; must not be {@code null}
73+
* @param metadata the document metadata to match against; must not be {@code null}
74+
* @return {@code true} if the metadata satisfies the expression
75+
*/
76+
public boolean evaluate(Filter.Expression expression, Map<String, Object> metadata) {
77+
return evaluateExpression(expression, metadata);
78+
}
79+
80+
private boolean evaluateOperand(Filter.Operand operand, Map<String, Object> metadata) {
81+
if (operand instanceof Filter.Group group) {
82+
return evaluateOperand(group.content(), metadata);
83+
}
84+
if (operand instanceof Filter.Expression expression) {
85+
return evaluateExpression(expression, metadata);
86+
}
87+
// Filter.Key and Filter.Value are leaf operands consumed directly by
88+
// metadataValue() and filterValue() inside evaluateExpression(). They are never
89+
// passed here as top-level boolean operands, so this branch is unreachable under
90+
// normal usage.
91+
throw new IllegalArgumentException("Unsupported operand type: " + operand.getClass().getName());
92+
}
93+
94+
private boolean evaluateExpression(Filter.Expression expression, Map<String, Object> metadata) {
95+
return switch (expression.type()) {
96+
case AND -> evaluateOperand(left(expression), metadata) && evaluateOperand(right(expression), metadata);
97+
case OR -> evaluateOperand(left(expression), metadata) || evaluateOperand(right(expression), metadata);
98+
// Unary operator: only the left operand is used. Ignore right operand
99+
case NOT -> !evaluateOperand(left(expression), metadata);
100+
case EQ -> compare(metadataValue(left(expression), metadata), filterValue(right(expression))) == 0;
101+
case NE -> compare(metadataValue(left(expression), metadata), filterValue(right(expression))) != 0;
102+
case GT -> compare(metadataValue(left(expression), metadata), filterValue(right(expression))) > 0;
103+
case GTE -> compare(metadataValue(left(expression), metadata), filterValue(right(expression))) >= 0;
104+
case LT -> compare(metadataValue(left(expression), metadata), filterValue(right(expression))) < 0;
105+
case LTE -> compare(metadataValue(left(expression), metadata), filterValue(right(expression))) <= 0;
106+
case IN -> {
107+
Object metaVal = metadataValue(left(expression), metadata);
108+
List<?> list = asList(filterValue(right(expression)), expression);
109+
yield list.stream().anyMatch(item -> compare(metaVal, item) == 0);
110+
}
111+
case NIN -> {
112+
Object metaVal = metadataValue(left(expression), metadata);
113+
List<?> list = asList(filterValue(right(expression)), expression);
114+
yield list.stream().noneMatch(item -> compare(metaVal, item) == 0);
115+
}
116+
// Unary operators: only the left operand (the key) is used.
117+
// A non-null right operand is silently ignored here.
118+
case ISNULL -> metadataValue(left(expression), metadata) == null;
119+
case ISNOTNULL -> metadataValue(left(expression), metadata) != null;
120+
};
121+
}
122+
123+
private Filter.Operand left(Filter.Expression expression) {
124+
Filter.Operand left = expression.left();
125+
if (left == null) {
126+
throw new IllegalArgumentException(
127+
"Expression of type %s requires a left operand".formatted(expression.type()));
128+
}
129+
return left;
130+
}
131+
132+
private Filter.Operand right(Filter.Expression expression) {
133+
Filter.Operand right = expression.right();
134+
if (right == null) {
135+
throw new IllegalArgumentException(
136+
"Expression of type %s requires a right operand".formatted(expression.type()));
137+
}
138+
return right;
139+
}
140+
141+
/**
142+
* Extracts the metadata value for the given {@link Filter.Key} operand. Outer quotes
143+
* ({@code "..."} or {@code '...'}) are stripped from the key name to match the format
144+
* used by {@link FilterExpressionBuilder} and the text parser.
145+
*/
146+
private @Nullable Object metadataValue(Filter.Operand operand, Map<String, Object> metadata) {
147+
if (operand instanceof Filter.Key key) {
148+
String k = key.key();
149+
if (k.length() >= 2
150+
&& ((k.startsWith("\"") && k.endsWith("\"")) || (k.startsWith("'") && k.endsWith("'")))) {
151+
k = k.substring(1, k.length() - 1);
152+
}
153+
return metadata.get(k);
154+
}
155+
throw new IllegalArgumentException("Expected a Key operand but got: " + operand.getClass().getName());
156+
}
157+
158+
/**
159+
* Extracts the constant value from a {@link Filter.Value} operand. {@link Date}
160+
* instances are formatted to their ISO-8601 UTC string so they can be compared
161+
* directly with metadata strings stored in the same format.
162+
*/
163+
private Object filterValue(Filter.Operand operand) {
164+
if (operand instanceof Filter.Value filterValue) {
165+
Object value = filterValue.value();
166+
return (value instanceof Date date) ? DATE_FORMATTER.format(date.toInstant()) : value;
167+
}
168+
throw new IllegalArgumentException("Expected a Value operand but got: " + operand.getClass().getName());
169+
}
170+
171+
/**
172+
* Compares two values. Numbers are promoted to {@code double} to allow cross-type
173+
* numeric comparison (e.g. {@code Integer} vs {@code Double}). All other
174+
* {@link Comparable} types are compared directly.
175+
*
176+
* <p>
177+
* Null ordering follows SQL {@code NULLS FIRST} semantics: {@code null} is considered
178+
* less than any non-null value. As a result, a missing metadata key causes ordered
179+
* comparisons ({@code GT}, {@code GTE}, {@code LT}, {@code LTE}) to behave as if the
180+
* key holds the smallest possible value — e.g. {@code year > 2020} returns
181+
* {@code false} when {@code year} is absent.
182+
*/
183+
@SuppressWarnings("unchecked")
184+
private int compare(@Nullable Object metaVal, @Nullable Object filterVal) {
185+
if (metaVal == null && filterVal == null) {
186+
return 0;
187+
}
188+
if (metaVal == null) {
189+
return -1;
190+
}
191+
if (filterVal == null) {
192+
return 1;
193+
}
194+
if (metaVal instanceof Number n1 && filterVal instanceof Number n2) {
195+
return Double.compare(n1.doubleValue(), n2.doubleValue());
196+
}
197+
if (Objects.equals(metaVal, filterVal)) {
198+
return 0;
199+
}
200+
if (metaVal instanceof Comparable comparable && filterVal instanceof Comparable) {
201+
try {
202+
return comparable.compareTo(filterVal);
203+
}
204+
catch (ClassCastException ex) {
205+
throw new IllegalArgumentException("Cannot compare values of incompatible types %s and %s"
206+
.formatted(metaVal.getClass().getName(), filterVal.getClass().getName()), ex);
207+
}
208+
}
209+
throw new IllegalArgumentException("Cannot compare values of types %s and %s"
210+
.formatted(metaVal.getClass().getName(), filterVal.getClass().getName()));
211+
}
212+
213+
private List<?> asList(Object value, Filter.Expression expression) {
214+
if (value instanceof List<?> list) {
215+
return list;
216+
}
217+
throw new IllegalArgumentException(
218+
"Expected a List value for %s expression but got: %s".formatted(expression.type(), value));
219+
}
220+
221+
}

0 commit comments

Comments
 (0)