|
| 1 | +const std = @import("std"); |
| 2 | +const storage = @import("storage.zig"); |
| 3 | + |
| 4 | +pub const PreflightFailure = union(enum) { |
| 5 | + server_unreachable: struct { |
| 6 | + url: []const u8, |
| 7 | + dialect: []const u8, |
| 8 | + }, |
| 9 | + db_open_failed: struct { |
| 10 | + path: []const u8, |
| 11 | + err_name: []const u8, |
| 12 | + }, |
| 13 | + schema_mismatch: struct { |
| 14 | + model_mismatch: bool, |
| 15 | + dim_mismatch: bool, |
| 16 | + stored_model: ?[]u8, |
| 17 | + current_model: []const u8, |
| 18 | + stored_dim: ?usize, |
| 19 | + current_dim: usize, |
| 20 | + }, |
| 21 | + |
| 22 | + pub fn deinit(self: *PreflightFailure, allocator: std.mem.Allocator) void { |
| 23 | + switch (self.*) { |
| 24 | + .schema_mismatch => |*m| { |
| 25 | + if (m.stored_model) |s| allocator.free(s); |
| 26 | + m.stored_model = null; |
| 27 | + }, |
| 28 | + else => {}, |
| 29 | + } |
| 30 | + } |
| 31 | +}; |
| 32 | + |
| 33 | +pub fn checkIndexConsistency( |
| 34 | + allocator: std.mem.Allocator, |
| 35 | + db_path: []const u8, |
| 36 | + embedding_model: []const u8, |
| 37 | + embedding_dim: usize, |
| 38 | +) !?PreflightFailure { |
| 39 | + const db = storage.openFileWithVec(allocator, db_path) catch |err| { |
| 40 | + return PreflightFailure{ .db_open_failed = .{ |
| 41 | + .path = db_path, |
| 42 | + .err_name = @errorName(err), |
| 43 | + } }; |
| 44 | + }; |
| 45 | + defer storage.close(db); |
| 46 | + |
| 47 | + var schema_result = storage.initSchema(allocator, db, .{ |
| 48 | + .embedding_dim = embedding_dim, |
| 49 | + .embedding_model = embedding_model, |
| 50 | + }) catch |err| { |
| 51 | + return PreflightFailure{ .db_open_failed = .{ |
| 52 | + .path = db_path, |
| 53 | + .err_name = @errorName(err), |
| 54 | + } }; |
| 55 | + }; |
| 56 | + |
| 57 | + if (!schema_result.embedding_model_mismatch and !schema_result.embedding_dim_mismatch) { |
| 58 | + schema_result.deinit(allocator); |
| 59 | + return null; |
| 60 | + } |
| 61 | + |
| 62 | + const stored_model = schema_result.stored_embedding_model; |
| 63 | + schema_result.stored_embedding_model = null; |
| 64 | + |
| 65 | + return PreflightFailure{ .schema_mismatch = .{ |
| 66 | + .model_mismatch = schema_result.embedding_model_mismatch, |
| 67 | + .dim_mismatch = schema_result.embedding_dim_mismatch, |
| 68 | + .stored_model = stored_model, |
| 69 | + .current_model = embedding_model, |
| 70 | + .stored_dim = schema_result.stored_embedding_dim, |
| 71 | + .current_dim = embedding_dim, |
| 72 | + } }; |
| 73 | +} |
| 74 | + |
| 75 | +pub fn formatActionable(failure: PreflightFailure, writer: *std.Io.Writer) !void { |
| 76 | + switch (failure) { |
| 77 | + .server_unreachable => |s| { |
| 78 | + try writer.print("error: cannot reach embedding server at {s}\n", .{s.url}); |
| 79 | + try writer.print(" configured dialect: {s}\n", .{s.dialect}); |
| 80 | + try writer.writeAll(" fix one of:\n"); |
| 81 | + try writer.writeAll(" - start the embedding server\n"); |
| 82 | + try writer.writeAll(" - update embedding_url / embedding_api in .codescan/config\n"); |
| 83 | + try writer.writeAll(" - run 'codescan setup-model' for setup instructions\n"); |
| 84 | + try writer.writeAll(" watcher NOT started.\n"); |
| 85 | + }, |
| 86 | + .db_open_failed => |s| { |
| 87 | + try writer.print("error: cannot open index database at {s} ({s})\n", .{ s.path, s.err_name }); |
| 88 | + try writer.writeAll(" fix: run 'codescan index' to (re)create the database.\n"); |
| 89 | + try writer.writeAll(" watcher NOT started.\n"); |
| 90 | + }, |
| 91 | + .schema_mismatch => |m| { |
| 92 | + if (m.model_mismatch) { |
| 93 | + try writer.print( |
| 94 | + "error: Embedding model mismatch. Index was built with '{s}', but current model is '{s}'.\n", |
| 95 | + .{ m.stored_model orelse "unknown", m.current_model }, |
| 96 | + ); |
| 97 | + } |
| 98 | + if (m.dim_mismatch) { |
| 99 | + try writer.print( |
| 100 | + "error: Embedding dimension mismatch. Index was built with {d}, but current setting is {d}.\n", |
| 101 | + .{ m.stored_dim orelse 0, m.current_dim }, |
| 102 | + ); |
| 103 | + } |
| 104 | + try writer.writeAll(" fix: run 'codescan index' to rebuild the index with the current model.\n"); |
| 105 | + try writer.writeAll(" watcher NOT started.\n"); |
| 106 | + }, |
| 107 | + } |
| 108 | +} |
| 109 | + |
| 110 | +// ============================================================================ |
| 111 | +// Tests |
| 112 | +// ============================================================================ |
| 113 | + |
| 114 | +const model = @import("model.zig"); |
| 115 | +const io_singleton = @import("io_singleton.zig"); |
| 116 | + |
| 117 | +fn writeTempDbWithSchema( |
| 118 | + allocator: std.mem.Allocator, |
| 119 | + dir: std.Io.Dir, |
| 120 | + path: []const u8, |
| 121 | + embedding_model: []const u8, |
| 122 | + embedding_dim: usize, |
| 123 | +) ![]u8 { |
| 124 | + const abs_path = try dir.realPathFileAlloc(io_singleton.getOrInit(), ".", allocator); |
| 125 | + defer allocator.free(abs_path); |
| 126 | + const full_path = try std.fs.path.join(allocator, &.{ abs_path, path }); |
| 127 | + |
| 128 | + const db = try storage.openFileWithVec(allocator, full_path); |
| 129 | + defer storage.close(db); |
| 130 | + |
| 131 | + var schema_result = try storage.initSchema(allocator, db, .{ |
| 132 | + .embedding_dim = embedding_dim, |
| 133 | + .embedding_model = embedding_model, |
| 134 | + }); |
| 135 | + defer schema_result.deinit(allocator); |
| 136 | + |
| 137 | + // Insert a symbol to mark index as populated — required for mismatch detection |
| 138 | + var sym = model.Symbol{ |
| 139 | + .language = try allocator.dupe(u8, "zig"), |
| 140 | + .file_path = try allocator.dupe(u8, "src/a.zig"), |
| 141 | + .name = try allocator.dupe(u8, "a"), |
| 142 | + .signature = try allocator.dupe(u8, "fn a() void"), |
| 143 | + .doc_comment = null, |
| 144 | + .start_line = 1, |
| 145 | + .end_line = 1, |
| 146 | + }; |
| 147 | + defer sym.deinit(allocator); |
| 148 | + _ = try storage.insertSymbol(db, sym); |
| 149 | + |
| 150 | + return full_path; |
| 151 | +} |
| 152 | + |
| 153 | +test "checkIndexConsistency returns null when model and dim match" { |
| 154 | + const allocator = std.testing.allocator; |
| 155 | + var tmp = std.testing.tmpDir(.{}); |
| 156 | + defer tmp.cleanup(); |
| 157 | + |
| 158 | + const path = try writeTempDbWithSchema(allocator, tmp.dir, "ok.sqlite3", "bge-large", 1024); |
| 159 | + defer allocator.free(path); |
| 160 | + |
| 161 | + const failure = try checkIndexConsistency(allocator, path, "bge-large", 1024); |
| 162 | + try std.testing.expect(failure == null); |
| 163 | +} |
| 164 | + |
| 165 | +test "checkIndexConsistency detects model mismatch on populated index" { |
| 166 | + const allocator = std.testing.allocator; |
| 167 | + var tmp = std.testing.tmpDir(.{}); |
| 168 | + defer tmp.cleanup(); |
| 169 | + |
| 170 | + const path = try writeTempDbWithSchema(allocator, tmp.dir, "mm.sqlite3", "bge-large", 1024); |
| 171 | + defer allocator.free(path); |
| 172 | + |
| 173 | + var failure = (try checkIndexConsistency(allocator, path, "jina-code-embeddings-1.5b-mlx", 1024)) orelse { |
| 174 | + try std.testing.expect(false); |
| 175 | + return; |
| 176 | + }; |
| 177 | + defer failure.deinit(allocator); |
| 178 | + |
| 179 | + try std.testing.expect(failure == .schema_mismatch); |
| 180 | + try std.testing.expect(failure.schema_mismatch.model_mismatch); |
| 181 | + try std.testing.expect(!failure.schema_mismatch.dim_mismatch); |
| 182 | + try std.testing.expect(failure.schema_mismatch.stored_model != null); |
| 183 | + try std.testing.expectEqualStrings("bge-large", failure.schema_mismatch.stored_model.?); |
| 184 | + try std.testing.expectEqualStrings("jina-code-embeddings-1.5b-mlx", failure.schema_mismatch.current_model); |
| 185 | +} |
| 186 | + |
| 187 | +test "checkIndexConsistency detects both model and dim mismatch (dirtree scenario)" { |
| 188 | + const allocator = std.testing.allocator; |
| 189 | + var tmp = std.testing.tmpDir(.{}); |
| 190 | + defer tmp.cleanup(); |
| 191 | + |
| 192 | + const path = try writeTempDbWithSchema(allocator, tmp.dir, "dual.sqlite3", "bge-large", 1024); |
| 193 | + defer allocator.free(path); |
| 194 | + |
| 195 | + var failure = (try checkIndexConsistency(allocator, path, "jina-code-embeddings-1.5b-mlx", 1536)) orelse { |
| 196 | + try std.testing.expect(false); |
| 197 | + return; |
| 198 | + }; |
| 199 | + defer failure.deinit(allocator); |
| 200 | + |
| 201 | + try std.testing.expect(failure == .schema_mismatch); |
| 202 | + try std.testing.expect(failure.schema_mismatch.model_mismatch); |
| 203 | + try std.testing.expect(failure.schema_mismatch.dim_mismatch); |
| 204 | + try std.testing.expectEqual(@as(?usize, 1024), failure.schema_mismatch.stored_dim); |
| 205 | + try std.testing.expectEqual(@as(usize, 1536), failure.schema_mismatch.current_dim); |
| 206 | +} |
| 207 | + |
| 208 | +test "formatActionable for schema_mismatch includes both errors and fix" { |
| 209 | + const allocator = std.testing.allocator; |
| 210 | + var aw: std.Io.Writer.Allocating = .init(allocator); |
| 211 | + defer aw.deinit(); |
| 212 | + |
| 213 | + const failure = PreflightFailure{ .schema_mismatch = .{ |
| 214 | + .model_mismatch = true, |
| 215 | + .dim_mismatch = true, |
| 216 | + .stored_model = null, |
| 217 | + .current_model = "jina-code-embeddings-1.5b-mlx", |
| 218 | + .stored_dim = 1024, |
| 219 | + .current_dim = 1536, |
| 220 | + } }; |
| 221 | + try formatActionable(failure, &aw.writer); |
| 222 | + |
| 223 | + const out = aw.written(); |
| 224 | + try std.testing.expect(std.mem.indexOf(u8, out, "Embedding model mismatch") != null); |
| 225 | + try std.testing.expect(std.mem.indexOf(u8, out, "Embedding dimension mismatch") != null); |
| 226 | + try std.testing.expect(std.mem.indexOf(u8, out, "codescan index") != null); |
| 227 | + try std.testing.expect(std.mem.indexOf(u8, out, "watcher NOT started") != null); |
| 228 | +} |
0 commit comments