Skip to content

Commit 52a7468

Browse files
committed
Fix Java vector search NullPointerException and dirty data handling
- Replace List.of() with Arrays.asList() to handle null values in MongoDB aggregation pipeline - Add safe type conversion for year field to handle dirty data (e.g., '1994è1998') - Improve error handling with specific catch blocks for IOException and InterruptedException - Add validation for Voyage AI API response structure - Remove debug logging statements
1 parent 57fe2b6 commit 52a7468

1 file changed

Lines changed: 89 additions & 33 deletions

File tree

server/java-spring/src/main/java/com/mongodb/samplemflix/service/MovieServiceImpl.java

Lines changed: 89 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -863,7 +863,7 @@ public List<VectorSearchResult> vectorSearchMovies(String query, Integer limit)
863863
// Step 1: Get movie IDs and scores from embedded_movies (which has the vector embeddings)
864864
List<ObjectId> movieIds = new ArrayList<>();
865865
Map<String, Double> scoreMap = new HashMap<>();
866-
866+
867867
mongoTemplate.getCollection("embedded_movies")
868868
.aggregate(aggregationPipeline)
869869
.forEach(doc -> {
@@ -873,44 +873,81 @@ public List<VectorSearchResult> vectorSearchMovies(String query, Integer limit)
873873
});
874874

875875
// Step 2: Fetch complete movie data from the movies collection (for CRUD compatibility)
876+
// Use aggregation to safely handle dirty data in the year field
876877
List<VectorSearchResult> results = new ArrayList<>();
878+
877879
if (!movieIds.isEmpty()) {
878-
Query movieQuery = new Query(Criteria.where("_id").in(movieIds));
879-
List<Movie> movies = mongoTemplate.find(movieQuery, Movie.class);
880-
881-
// Log if there's a mismatch between collections
882-
if (movies.size() != movieIds.size()) {
883-
System.out.println("Warning: Found " + movieIds.size() +
884-
" movies in embedded_movies but only " + movies.size() +
885-
" in movies collection for vector search");
886-
}
887-
888-
// Convert to VectorSearchResult with scores preserved
889-
for (Movie movie : movies) {
890-
String movieIdStr = movie.getId().toString();
891-
Double score = scoreMap.get(movieIdStr);
892-
893-
if (score != null) { // Only include movies that have vector scores
894-
VectorSearchResult result = VectorSearchResult.builder()
895-
.id(movieIdStr)
896-
.title(movie.getTitle())
897-
.plot(movie.getPlot())
898-
.poster(movie.getPoster())
899-
.year(movie.getYear())
900-
.genres(movie.getGenres())
901-
.directors(movie.getDirectors())
902-
.cast(movie.getCast())
903-
.score(score)
904-
.build();
905-
results.add(result);
906-
}
907-
}
880+
// Build aggregation pipeline to safely convert year field
881+
Document matchStage = new Document("$match", new Document("_id", new Document("$in", movieIds)));
882+
883+
// Project stage to safely convert year to integer, handling dirty data
884+
Document projectStage2 = new Document("$project", new Document()
885+
.append("_id", 1)
886+
.append("title", 1)
887+
.append("plot", 1)
888+
.append("poster", 1)
889+
.append("genres", 1)
890+
.append("directors", 1)
891+
.append("cast", 1)
892+
// Safely convert year to integer, handling strings and dirty data
893+
.append("year", new Document("$cond", new Document()
894+
.append("if", new Document("$and", java.util.Arrays.asList(
895+
new Document("$ne", java.util.Arrays.asList("$year", null)),
896+
new Document("$eq", java.util.Arrays.asList(new Document("$type", "$year"), "int"))
897+
)))
898+
.append("then", "$year")
899+
.append("else", null)
900+
))
901+
);
902+
903+
List<Document> moviePipeline = List.of(matchStage, projectStage2);
904+
905+
// Execute aggregation and manually build VectorSearchResult objects
906+
mongoTemplate.getCollection("movies").aggregate(moviePipeline)
907+
.forEach(doc -> {
908+
ObjectId movieIdObj = doc.getObjectId("_id");
909+
if (movieIdObj == null) {
910+
return;
911+
}
912+
913+
String movieIdStr = movieIdObj.toString();
914+
Double score = scoreMap.get(movieIdStr);
915+
916+
if (score != null) { // Only include movies that have vector scores
917+
// Safely get list fields, defaulting to null if not present
918+
List<String> genres = doc.getList("genres", String.class);
919+
List<String> directors = doc.getList("directors", String.class);
920+
List<String> cast = doc.getList("cast", String.class);
921+
922+
VectorSearchResult result = VectorSearchResult.builder()
923+
.id(movieIdStr)
924+
.title(doc.getString("title"))
925+
.plot(doc.getString("plot"))
926+
.poster(doc.getString("poster"))
927+
.year(doc.getInteger("year")) // Will be null for dirty data
928+
.genres(genres)
929+
.directors(directors)
930+
.cast(cast)
931+
.score(score)
932+
.build();
933+
results.add(result);
934+
}
935+
});
908936
}
909937

910938
return results;
911939

940+
} catch (IOException e) {
941+
// Handle Voyage AI API errors
942+
String errorMsg = e.getMessage() != null ? e.getMessage() : "Network error calling Voyage AI API";
943+
throw new DatabaseOperationException("Error performing vector search: " + errorMsg);
944+
} catch (InterruptedException e) {
945+
Thread.currentThread().interrupt();
946+
throw new DatabaseOperationException("Vector search was interrupted");
912947
} catch (Exception e) {
913-
throw new DatabaseOperationException("Error performing vector search: " + e.getMessage());
948+
// Handle other errors (e.g., MongoDB errors, parsing errors)
949+
String errorMsg = e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName();
950+
throw new DatabaseOperationException("Error performing vector search: " + errorMsg);
914951
}
915952
}
916953

@@ -956,7 +993,26 @@ private List<Double> generateVoyageEmbedding(String text, String apiKey) throws
956993
// Parse the JSON response to extract the embedding
957994
ObjectMapper mapper = new ObjectMapper();
958995
JsonNode root = mapper.readTree(response.body());
959-
JsonNode embeddingNode = root.path("data").get(0).path("embedding");
996+
997+
// Validate response structure
998+
if (!root.has("data")) {
999+
throw new IOException("Invalid Voyage AI API response: missing 'data' field. Response: " + response.body());
1000+
}
1001+
1002+
JsonNode dataNode = root.get("data");
1003+
if (dataNode == null || !dataNode.isArray() || dataNode.size() == 0) {
1004+
throw new IOException("Invalid Voyage AI API response: 'data' field is empty or not an array. Response: " + response.body());
1005+
}
1006+
1007+
JsonNode firstElement = dataNode.get(0);
1008+
if (firstElement == null || !firstElement.has("embedding")) {
1009+
throw new IOException("Invalid Voyage AI API response: missing 'embedding' field. Response: " + response.body());
1010+
}
1011+
1012+
JsonNode embeddingNode = firstElement.get("embedding");
1013+
if (embeddingNode == null || !embeddingNode.isArray()) {
1014+
throw new IOException("Invalid Voyage AI API response: 'embedding' is not an array. Response: " + response.body());
1015+
}
9601016

9611017
// Convert the embedding to a List<Double>
9621018
List<Double> embedding = new ArrayList<>();

0 commit comments

Comments
 (0)