Skip to content

Commit 010b146

Browse files
committed
Fix MongoDB Atlas Search returning too many results for multi-word queries
Use compound queries with AND logic for directors, writers, and cast fields to require ALL search terms to match, preventing 'james cameron' from matching any director with 'James' OR 'Cameron'. Changes: - Split multi-word queries into individual terms - Wrap terms in compound 'must' clause (AND logic) - Adjust fuzzy settings: maxEdits=1, prefixLength=2 for better typo tolerance without over-matching (e.g., prevents 'james' matching 'jane') Single-word queries continue to use simple text operator with fuzzy matching. Affected files: - Python FastAPI: mflix/server/python-fastapi/src/routers/movies.py - Express TypeScript: mflix/server/js-express/src/controllers/movieController.ts - Express types: mflix/server/js-express/src/types/index.ts - Java Spring: mflix/server/java-spring/src/main/java/com/mongodb/samplemflix/service/MovieServiceImpl.java
1 parent 5e6f4a0 commit 010b146

4 files changed

Lines changed: 247 additions & 61 deletions

File tree

mflix/server/java-spring/src/main/java/com/mongodb/samplemflix/service/MovieServiceImpl.java

Lines changed: 81 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -634,30 +634,94 @@ public List<Movie> searchMovies(MovieSearchRequest searchRequest) {
634634
));
635635
}
636636

637-
// Add directors search if provided (using phrase operator for exact phrase matching)
638-
// This ensures that searching for "james cameron" only matches documents where
639-
// "James Cameron" appears as an exact phrase, not documents containing "James" OR "Cameron".
637+
// Add directors search if provided
638+
// Split multi-word queries into individual terms and require ALL terms to match (AND logic).
639+
// This prevents "james cameron" from matching any director with "James" OR "Cameron",
640+
// while still allowing fuzzy matching for typo tolerance.
641+
// Fuzzy settings: maxEdits=1 allows up to 1 character edit, prefixLength=2 requires
642+
// only the first 2 characters to match exactly before fuzzy matching kicks in.
640643
if (searchRequest.getDirectors() != null && !searchRequest.getDirectors().trim().isEmpty()) {
641-
searchPhrases.add(new Document("phrase", new Document()
642-
.append("query", searchRequest.getDirectors().trim())
643-
.append("path", Movie.Fields.DIRECTORS)
644-
));
644+
String[] directorTerms = searchRequest.getDirectors().trim().split("\\s+");
645+
if (directorTerms.length == 1) {
646+
searchPhrases.add(new Document("text", new Document()
647+
.append("query", searchRequest.getDirectors().trim())
648+
.append("path", Movie.Fields.DIRECTORS)
649+
.append("fuzzy", new Document()
650+
.append("maxEdits", 1)
651+
.append("prefixLength", 2)
652+
)
653+
));
654+
} else {
655+
// Use compound must clause to require all terms match (AND logic)
656+
java.util.List<Document> mustClauses = java.util.Arrays.stream(directorTerms)
657+
.filter(term -> !term.isEmpty())
658+
.map(term -> new Document("text", new Document()
659+
.append("query", term)
660+
.append("path", Movie.Fields.DIRECTORS)
661+
.append("fuzzy", new Document()
662+
.append("maxEdits", 1)
663+
.append("prefixLength", 2)
664+
)
665+
))
666+
.collect(java.util.stream.Collectors.toList());
667+
searchPhrases.add(new Document("compound", new Document("must", mustClauses)));
668+
}
645669
}
646670

647-
// Add writers search if provided (using phrase operator for exact phrase matching)
671+
// Add writers search if provided (see directors comments for AND logic explanation)
648672
if (searchRequest.getWriters() != null && !searchRequest.getWriters().trim().isEmpty()) {
649-
searchPhrases.add(new Document("phrase", new Document()
650-
.append("query", searchRequest.getWriters().trim())
651-
.append("path", Movie.Fields.WRITERS)
652-
));
673+
String[] writerTerms = searchRequest.getWriters().trim().split("\\s+");
674+
if (writerTerms.length == 1) {
675+
searchPhrases.add(new Document("text", new Document()
676+
.append("query", searchRequest.getWriters().trim())
677+
.append("path", Movie.Fields.WRITERS)
678+
.append("fuzzy", new Document()
679+
.append("maxEdits", 1)
680+
.append("prefixLength", 2)
681+
)
682+
));
683+
} else {
684+
java.util.List<Document> mustClauses = java.util.Arrays.stream(writerTerms)
685+
.filter(term -> !term.isEmpty())
686+
.map(term -> new Document("text", new Document()
687+
.append("query", term)
688+
.append("path", Movie.Fields.WRITERS)
689+
.append("fuzzy", new Document()
690+
.append("maxEdits", 1)
691+
.append("prefixLength", 2)
692+
)
693+
))
694+
.collect(java.util.stream.Collectors.toList());
695+
searchPhrases.add(new Document("compound", new Document("must", mustClauses)));
696+
}
653697
}
654698

655-
// Add cast search if provided (using phrase operator for exact phrase matching)
699+
// Add cast search if provided (see directors comments for AND logic explanation)
656700
if (searchRequest.getCast() != null && !searchRequest.getCast().trim().isEmpty()) {
657-
searchPhrases.add(new Document("phrase", new Document()
658-
.append("query", searchRequest.getCast().trim())
659-
.append("path", Movie.Fields.CAST)
660-
));
701+
String[] castTerms = searchRequest.getCast().trim().split("\\s+");
702+
if (castTerms.length == 1) {
703+
searchPhrases.add(new Document("text", new Document()
704+
.append("query", searchRequest.getCast().trim())
705+
.append("path", Movie.Fields.CAST)
706+
.append("fuzzy", new Document()
707+
.append("maxEdits", 1)
708+
.append("prefixLength", 2)
709+
)
710+
));
711+
} else {
712+
java.util.List<Document> mustClauses = java.util.Arrays.stream(castTerms)
713+
.filter(term -> !term.isEmpty())
714+
.map(term -> new Document("text", new Document()
715+
.append("query", term)
716+
.append("path", Movie.Fields.CAST)
717+
.append("fuzzy", new Document()
718+
.append("maxEdits", 1)
719+
.append("prefixLength", 2)
720+
)
721+
))
722+
.collect(java.util.stream.Collectors.toList());
723+
searchPhrases.add(new Document("compound", new Document("must", mustClauses)));
724+
}
661725
}
662726

663727
// Build the $search aggregation stage with compound operator

mflix/server/js-express/src/controllers/movieController.ts

Lines changed: 72 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -578,34 +578,85 @@ export async function searchMovies(req: Request, res: Response): Promise<void> {
578578
});
579579
}
580580

581-
// The phrase operator performs an exact phrase match on the specified field.
582-
// This ensures that searching for "james cameron" only matches documents where
583-
// "James Cameron" appears as an exact phrase, not documents containing "James" OR "Cameron".
581+
// Split multi-word queries into individual terms and require ALL terms to match (AND logic).
582+
// This prevents "james cameron" from matching any director with "James" OR "Cameron",
583+
// while still allowing fuzzy matching for typo tolerance.
584+
// Fuzzy settings: maxEdits=2 allows up to 2 character edits, prefixLength=2 requires
585+
// only the first 2 characters to match exactly before fuzzy matching kicks in.
584586
if (directors) {
585-
searchPhrases.push({
586-
phrase: {
587-
query: directors,
588-
path: "directors",
589-
},
590-
});
587+
const directorTerms = directors.split(/\s+/).filter((t) => t.length > 0);
588+
if (directorTerms.length === 1) {
589+
searchPhrases.push({
590+
text: {
591+
query: directors,
592+
path: "directors",
593+
fuzzy: { maxEdits: 1, prefixLength: 2 },
594+
},
595+
});
596+
} else {
597+
// Use compound must clause to require all terms match (AND logic)
598+
searchPhrases.push({
599+
compound: {
600+
must: directorTerms.map((term) => ({
601+
text: {
602+
query: term,
603+
path: "directors",
604+
fuzzy: { maxEdits: 1, prefixLength: 2 },
605+
},
606+
})),
607+
},
608+
});
609+
}
591610
}
592611

593612
if (writers) {
594-
searchPhrases.push({
595-
phrase: {
596-
query: writers,
597-
path: "writers",
598-
},
599-
});
613+
const writerTerms = writers.split(/\s+/).filter((t) => t.length > 0);
614+
if (writerTerms.length === 1) {
615+
searchPhrases.push({
616+
text: {
617+
query: writers,
618+
path: "writers",
619+
fuzzy: { maxEdits: 1, prefixLength: 2 },
620+
},
621+
});
622+
} else {
623+
searchPhrases.push({
624+
compound: {
625+
must: writerTerms.map((term) => ({
626+
text: {
627+
query: term,
628+
path: "writers",
629+
fuzzy: { maxEdits: 1, prefixLength: 2 },
630+
},
631+
})),
632+
},
633+
});
634+
}
600635
}
601636

602637
if (cast) {
603-
searchPhrases.push({
604-
phrase: {
605-
query: cast,
606-
path: "cast",
607-
},
608-
});
638+
const castTerms = cast.split(/\s+/).filter((t) => t.length > 0);
639+
if (castTerms.length === 1) {
640+
searchPhrases.push({
641+
text: {
642+
query: cast,
643+
path: "cast",
644+
fuzzy: { maxEdits: 1, prefixLength: 2 },
645+
},
646+
});
647+
} else {
648+
searchPhrases.push({
649+
compound: {
650+
must: castTerms.map((term) => ({
651+
text: {
652+
query: term,
653+
path: "cast",
654+
fuzzy: { maxEdits: 1, prefixLength: 2 },
655+
},
656+
})),
657+
},
658+
});
659+
}
609660
}
610661

611662
if (searchPhrases.length === 0) {

mflix/server/js-express/src/types/index.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,15 @@ export interface SearchPhrase {
298298
path: string;
299299
fuzzy?: { maxEdits: number; prefixLength: number };
300300
};
301+
compound?: {
302+
must?: Array<{
303+
text: {
304+
query: string;
305+
path: string;
306+
fuzzy?: { maxEdits: number; prefixLength: number };
307+
};
308+
}>;
309+
};
301310
}
302311

303312
/**

mflix/server/python-fastapi/src/routers/movies.py

Lines changed: 85 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -154,31 +154,93 @@ async def search_movies(
154154
}
155155
})
156156
if directors is not None:
157-
# The phrase operator performs an exact phrase match on the specified field.
158-
# This ensures that searching for "james cameron" only matches documents where
159-
# "James Cameron" appears as an exact phrase, not documents containing "James" OR "Cameron".
160-
search_phrases.append({
161-
"phrase": {
162-
"query": directors,
163-
"path": "directors",
164-
}
165-
})
157+
# Split multi-word queries into individual terms and require ALL terms to match (AND logic).
158+
# This prevents "james cameron" from matching any director with "James" OR "Cameron".
159+
# The "fuzzy" option enables typo-tolerant search within MongoDB Search.
160+
# - maxEdits: The maximum number of single-character edits (insertions, deletions, or substitutions)
161+
# allowed when matching the search term to indexed terms. (Range: 1-2; higher = more tolerant)
162+
# - prefixLength: The number of initial characters that must exactly match before fuzzy matching is applied.
163+
# (Lower values allow typos earlier in the word but may be slower.)
164+
# For more details, see: https://www.mongodb.com/docs/atlas/atlas-search/operators-collectors/text/
165+
director_terms = directors.split()
166+
if len(director_terms) == 1:
167+
search_phrases.append({
168+
"text": {
169+
"query": directors,
170+
"path": "directors",
171+
"fuzzy": {"maxEdits": 1, "prefixLength": 2}
172+
}
173+
})
174+
else:
175+
# Use compound must clause to require all terms match (AND logic)
176+
search_phrases.append({
177+
"compound": {
178+
"must": [
179+
{
180+
"text": {
181+
"query": term,
182+
"path": "directors",
183+
"fuzzy": {"maxEdits": 1, "prefixLength": 2}
184+
}
185+
}
186+
for term in director_terms
187+
]
188+
}
189+
})
190+
166191
if writers is not None:
167-
# The phrase operator performs an exact phrase match on the specified field.
168-
search_phrases.append({
169-
"phrase": {
170-
"query": writers,
171-
"path": "writers",
172-
}
173-
})
192+
# See comments above regarding fuzzy search and AND logic for multi-word queries.
193+
writer_terms = writers.split()
194+
if len(writer_terms) == 1:
195+
search_phrases.append({
196+
"text": {
197+
"query": writers,
198+
"path": "writers",
199+
"fuzzy": {"maxEdits": 1, "prefixLength": 2}
200+
}
201+
})
202+
else:
203+
search_phrases.append({
204+
"compound": {
205+
"must": [
206+
{
207+
"text": {
208+
"query": term,
209+
"path": "writers",
210+
"fuzzy": {"maxEdits": 1, "prefixLength": 2}
211+
}
212+
}
213+
for term in writer_terms
214+
]
215+
}
216+
})
217+
174218
if cast is not None:
175-
# The phrase operator performs an exact phrase match on the specified field.
176-
search_phrases.append({
177-
"phrase": {
178-
"query": cast,
179-
"path": "cast",
180-
}
181-
})
219+
# See comments above regarding fuzzy search and AND logic for multi-word queries.
220+
cast_terms = cast.split()
221+
if len(cast_terms) == 1:
222+
search_phrases.append({
223+
"text": {
224+
"query": cast,
225+
"path": "cast",
226+
"fuzzy": {"maxEdits": 1, "prefixLength": 2}
227+
}
228+
})
229+
else:
230+
search_phrases.append({
231+
"compound": {
232+
"must": [
233+
{
234+
"text": {
235+
"query": term,
236+
"path": "cast",
237+
"fuzzy": {"maxEdits": 1, "prefixLength": 2}
238+
}
239+
}
240+
for term in cast_terms
241+
]
242+
}
243+
})
182244

183245
if not search_phrases:
184246
raise HTTPException(

0 commit comments

Comments
 (0)