Skip to content

Commit 6cf9190

Browse files
committed
Use compound should clauses for better search result ranking
Create a scoring hierarchy for directors, writers, and cast searches: 1. phrase match (highest score) - exact phrase in same array element 2. text match without fuzzy (high score) - all terms present, exact spelling 3. text match with fuzzy (lower score) - typo-tolerant fallback This ensures 'James Cameron' ranks higher than movies with separate 'James Mangold' + 'Cameron Crowe' directors, while still supporting typo tolerance via fuzzy matching. Also increased maxEdits to 2 for better typo tolerance on fuzzy fallback.
1 parent ba3902f commit 6cf9190

4 files changed

Lines changed: 168 additions & 76 deletions

File tree

mflix/server/java-spring/src/main/java/com/mongodb/samplemflix/service/MovieServiceImpl.java

Lines changed: 68 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -635,47 +635,84 @@ public List<Movie> searchMovies(MovieSearchRequest searchRequest) {
635635
}
636636

637637
// Add directors search if provided
638-
// Use matchCriteria: "all" to require ALL terms in the query to match (AND logic).
639-
// This prevents "james cameron" from matching any director with "James" OR "Cameron",
640-
// while still allowing fuzzy matching for typo tolerance.
641-
// Fuzzy settings: maxEdits=1 allows up to 1 character edit, prefixLength=2 requires
642-
// only the first 2 characters to match exactly before fuzzy matching kicks in.
638+
// Use compound operator with "should" clauses to create a scoring hierarchy:
639+
// 1. phrase match (highest score) - exact phrase in same array element
640+
// 2. text match without fuzzy (high score) - all terms present, exact spelling
641+
// 3. text match with fuzzy (lower score) - typo-tolerant fallback
642+
// This ensures "James Cameron" ranks higher than "James Mangold" + "Cameron Crowe"
643+
// while still supporting typo tolerance.
643644
// For more details, see: https://www.mongodb.com/docs/atlas/atlas-search/operators-collectors/text/
644645
if (searchRequest.getDirectors() != null && !searchRequest.getDirectors().trim().isEmpty()) {
645-
searchPhrases.add(new Document("text", new Document()
646-
.append("query", searchRequest.getDirectors().trim())
647-
.append("path", Movie.Fields.DIRECTORS)
648-
.append("matchCriteria", "all")
649-
.append("fuzzy", new Document()
650-
.append("maxEdits", 1)
651-
.append("prefixLength", 2)
652-
)
646+
String directorsQuery = searchRequest.getDirectors().trim();
647+
searchPhrases.add(new Document("compound", new Document()
648+
.append("should", java.util.Arrays.asList(
649+
// Highest score: exact phrase match
650+
new Document("phrase", new Document()
651+
.append("query", directorsQuery)
652+
.append("path", Movie.Fields.DIRECTORS)),
653+
// High score: exact text match (all terms, no fuzzy)
654+
new Document("text", new Document()
655+
.append("query", directorsQuery)
656+
.append("path", Movie.Fields.DIRECTORS)
657+
.append("matchCriteria", "all")),
658+
// Lower score: fuzzy match (typo tolerance)
659+
new Document("text", new Document()
660+
.append("query", directorsQuery)
661+
.append("path", Movie.Fields.DIRECTORS)
662+
.append("matchCriteria", "all")
663+
.append("fuzzy", new Document()
664+
.append("maxEdits", 2)
665+
.append("prefixLength", 2)))
666+
))
667+
.append("minimumShouldMatch", 1)
653668
));
654669
}
655670

656-
// Add writers search if provided (see directors comments for matchCriteria explanation)
671+
// Add writers search if provided (see directors comments for compound should scoring hierarchy)
657672
if (searchRequest.getWriters() != null && !searchRequest.getWriters().trim().isEmpty()) {
658-
searchPhrases.add(new Document("text", new Document()
659-
.append("query", searchRequest.getWriters().trim())
660-
.append("path", Movie.Fields.WRITERS)
661-
.append("matchCriteria", "all")
662-
.append("fuzzy", new Document()
663-
.append("maxEdits", 1)
664-
.append("prefixLength", 2)
665-
)
673+
String writersQuery = searchRequest.getWriters().trim();
674+
searchPhrases.add(new Document("compound", new Document()
675+
.append("should", java.util.Arrays.asList(
676+
new Document("phrase", new Document()
677+
.append("query", writersQuery)
678+
.append("path", Movie.Fields.WRITERS)),
679+
new Document("text", new Document()
680+
.append("query", writersQuery)
681+
.append("path", Movie.Fields.WRITERS)
682+
.append("matchCriteria", "all")),
683+
new Document("text", new Document()
684+
.append("query", writersQuery)
685+
.append("path", Movie.Fields.WRITERS)
686+
.append("matchCriteria", "all")
687+
.append("fuzzy", new Document()
688+
.append("maxEdits", 2)
689+
.append("prefixLength", 2)))
690+
))
691+
.append("minimumShouldMatch", 1)
666692
));
667693
}
668694

669-
// Add cast search if provided (see directors comments for matchCriteria explanation)
695+
// Add cast search if provided (see directors comments for compound should scoring hierarchy)
670696
if (searchRequest.getCast() != null && !searchRequest.getCast().trim().isEmpty()) {
671-
searchPhrases.add(new Document("text", new Document()
672-
.append("query", searchRequest.getCast().trim())
673-
.append("path", Movie.Fields.CAST)
674-
.append("matchCriteria", "all")
675-
.append("fuzzy", new Document()
676-
.append("maxEdits", 1)
677-
.append("prefixLength", 2)
678-
)
697+
String castQuery = searchRequest.getCast().trim();
698+
searchPhrases.add(new Document("compound", new Document()
699+
.append("should", java.util.Arrays.asList(
700+
new Document("phrase", new Document()
701+
.append("query", castQuery)
702+
.append("path", Movie.Fields.CAST)),
703+
new Document("text", new Document()
704+
.append("query", castQuery)
705+
.append("path", Movie.Fields.CAST)
706+
.append("matchCriteria", "all")),
707+
new Document("text", new Document()
708+
.append("query", castQuery)
709+
.append("path", Movie.Fields.CAST)
710+
.append("matchCriteria", "all")
711+
.append("fuzzy", new Document()
712+
.append("maxEdits", 2)
713+
.append("prefixLength", 2)))
714+
))
715+
.append("minimumShouldMatch", 1)
679716
));
680717
}
681718

mflix/server/js-express/src/controllers/movieController.ts

Lines changed: 53 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -578,42 +578,74 @@ export async function searchMovies(req: Request, res: Response): Promise<void> {
578578
});
579579
}
580580

581-
// For directors, writers, and cast fields, we use matchCriteria: "all" to require ALL terms
582-
// in the query to match (AND logic). This prevents "james cameron" from matching any director
583-
// with "James" OR "Cameron" (which would return too many results), while still allowing
584-
// fuzzy matching for typo tolerance.
585-
// Fuzzy settings: maxEdits=1 allows up to 1 character edit, prefixLength=2 requires
586-
// only the first 2 characters to match exactly before fuzzy matching kicks in.
581+
// Use compound operator with "should" clauses to create a scoring hierarchy:
582+
// 1. phrase match (highest score) - exact phrase in same array element
583+
// 2. text match without fuzzy (high score) - all terms present, exact spelling
584+
// 3. text match with fuzzy (lower score) - typo-tolerant fallback
585+
// This ensures "James Cameron" ranks higher than "James Mangold" + "Cameron Crowe"
586+
// while still supporting typo tolerance.
587587
// For more details, see: https://www.mongodb.com/docs/atlas/atlas-search/operators-collectors/text/
588588
if (directors) {
589589
searchPhrases.push({
590-
text: {
591-
query: directors,
592-
path: "directors",
593-
matchCriteria: "all",
594-
fuzzy: { maxEdits: 1, prefixLength: 2 },
590+
compound: {
591+
should: [
592+
// Highest score: exact phrase match
593+
{ phrase: { query: directors, path: "directors" } },
594+
// High score: exact text match (all terms, no fuzzy)
595+
{ text: { query: directors, path: "directors", matchCriteria: "all" } },
596+
// Lower score: fuzzy match (typo tolerance)
597+
{
598+
text: {
599+
query: directors,
600+
path: "directors",
601+
matchCriteria: "all",
602+
fuzzy: { maxEdits: 2, prefixLength: 2 },
603+
},
604+
},
605+
],
606+
minimumShouldMatch: 1,
595607
},
596608
});
597609
}
598610

599611
if (writers) {
612+
// See comments above regarding compound should scoring hierarchy.
600613
searchPhrases.push({
601-
text: {
602-
query: writers,
603-
path: "writers",
604-
matchCriteria: "all",
605-
fuzzy: { maxEdits: 1, prefixLength: 2 },
614+
compound: {
615+
should: [
616+
{ phrase: { query: writers, path: "writers" } },
617+
{ text: { query: writers, path: "writers", matchCriteria: "all" } },
618+
{
619+
text: {
620+
query: writers,
621+
path: "writers",
622+
matchCriteria: "all",
623+
fuzzy: { maxEdits: 2, prefixLength: 2 },
624+
},
625+
},
626+
],
627+
minimumShouldMatch: 1,
606628
},
607629
});
608630
}
609631

610632
if (cast) {
633+
// See comments above regarding compound should scoring hierarchy.
611634
searchPhrases.push({
612-
text: {
613-
query: cast,
614-
path: "cast",
615-
matchCriteria: "all",
616-
fuzzy: { maxEdits: 1, prefixLength: 2 },
635+
compound: {
636+
should: [
637+
{ phrase: { query: cast, path: "cast" } },
638+
{ text: { query: cast, path: "cast", matchCriteria: "all" } },
639+
{
640+
text: {
641+
query: cast,
642+
path: "cast",
643+
matchCriteria: "all",
644+
fuzzy: { maxEdits: 2, prefixLength: 2 },
645+
},
646+
},
647+
],
648+
minimumShouldMatch: 1,
617649
},
618650
});
619651
}

mflix/server/js-express/src/types/index.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,18 @@ export interface SearchPhrase {
299299
matchCriteria?: "any" | "all";
300300
fuzzy?: { maxEdits: number; prefixLength: number };
301301
};
302+
compound?: {
303+
should?: Array<{
304+
phrase?: { query: string; path: string };
305+
text?: {
306+
query: string;
307+
path: string;
308+
matchCriteria?: "any" | "all";
309+
fuzzy?: { maxEdits: number; prefixLength: number };
310+
};
311+
}>;
312+
minimumShouldMatch?: number;
313+
};
302314
}
303315

304316
/**

mflix/server/python-fastapi/src/routers/movies.py

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -154,42 +154,53 @@ async def search_movies(
154154
}
155155
})
156156
if directors is not None:
157-
# Use matchCriteria: "all" to require ALL terms in the query to match (AND logic).
158-
# This prevents "james cameron" from matching any director with "James" OR "Cameron".
159-
# The "fuzzy" option enables typo-tolerant search within MongoDB Search.
160-
# - maxEdits: The maximum number of single-character edits (insertions, deletions, or substitutions)
161-
# allowed when matching the search term to indexed terms. (Range: 1-2; higher = more tolerant)
162-
# - prefixLength: The number of initial characters that must exactly match before fuzzy matching is applied.
163-
# (Lower values allow typos earlier in the word but may be slower.)
157+
# Use compound operator with "should" clauses to create a scoring hierarchy:
158+
# 1. phrase match (highest score) - exact phrase in same array element
159+
# 2. text match without fuzzy (high score) - all terms present, exact spelling
160+
# 3. text match with fuzzy (lower score) - typo-tolerant fallback
161+
# This ensures "James Cameron" ranks higher than "James Mangold" + "Cameron Crowe"
162+
# while still supporting typo tolerance.
164163
# For more details, see: https://www.mongodb.com/docs/atlas/atlas-search/operators-collectors/text/
165164
search_phrases.append({
166-
"text": {
167-
"query": directors,
168-
"path": "directors",
169-
"matchCriteria": "all",
170-
"fuzzy": {"maxEdits": 1, "prefixLength": 2}
165+
"compound": {
166+
"should": [
167+
# Highest score: exact phrase match
168+
{"phrase": {"query": directors, "path": "directors"}},
169+
# High score: exact text match (all terms, no fuzzy)
170+
{"text": {"query": directors, "path": "directors", "matchCriteria": "all"}},
171+
# Lower score: fuzzy match (typo tolerance)
172+
{"text": {"query": directors, "path": "directors", "matchCriteria": "all",
173+
"fuzzy": {"maxEdits": 2, "prefixLength": 2}}}
174+
],
175+
"minimumShouldMatch": 1
171176
}
172177
})
173178

174179
if writers is not None:
175-
# See comments above regarding fuzzy search and matchCriteria for multi-word queries.
180+
# See comments above regarding compound should scoring hierarchy.
176181
search_phrases.append({
177-
"text": {
178-
"query": writers,
179-
"path": "writers",
180-
"matchCriteria": "all",
181-
"fuzzy": {"maxEdits": 1, "prefixLength": 2}
182+
"compound": {
183+
"should": [
184+
{"phrase": {"query": writers, "path": "writers"}},
185+
{"text": {"query": writers, "path": "writers", "matchCriteria": "all"}},
186+
{"text": {"query": writers, "path": "writers", "matchCriteria": "all",
187+
"fuzzy": {"maxEdits": 2, "prefixLength": 2}}}
188+
],
189+
"minimumShouldMatch": 1
182190
}
183191
})
184192

185193
if cast is not None:
186-
# See comments above regarding fuzzy search and matchCriteria for multi-word queries.
194+
# See comments above regarding compound should scoring hierarchy.
187195
search_phrases.append({
188-
"text": {
189-
"query": cast,
190-
"path": "cast",
191-
"matchCriteria": "all",
192-
"fuzzy": {"maxEdits": 1, "prefixLength": 2}
196+
"compound": {
197+
"should": [
198+
{"phrase": {"query": cast, "path": "cast"}},
199+
{"text": {"query": cast, "path": "cast", "matchCriteria": "all"}},
200+
{"text": {"query": cast, "path": "cast", "matchCriteria": "all",
201+
"fuzzy": {"maxEdits": 2, "prefixLength": 2}}}
202+
],
203+
"minimumShouldMatch": 1
193204
}
194205
})
195206

0 commit comments

Comments
 (0)