Skip to content

Commit f9c8bd1

Browse files
committed
feat: add index metadata contract to prevent incompatible provider searches
Store embedding provider, model, and dimensions in SQLite metadata. Refuse to search if current provider is incompatible with indexed data. User must run index with force=true to rebuild when provider changes.
1 parent c340991 commit f9c8bd1

File tree

4 files changed

+212
-0
lines changed

4 files changed

+212
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ graph TD
131131
- **Native speed**: Core logic written in Rust for maximum performance
132132
- **Memory efficient**: F16 vector quantization reduces index size by 50%
133133
- **Branch-aware**: Automatically tracks which chunks exist on each git branch
134+
- **Provider validation**: Detects embedding provider/model changes and requires rebuild to prevent garbage results
134135

135136
## 🌿 Branch-Aware Indexing
136137

TROUBLESHOOTING.md

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ Common issues and solutions for opencode-codebase-index.
88
- [No Embedding Provider Available](#no-embedding-provider-available)
99
- [Rate Limiting Errors](#rate-limiting-errors)
1010
- [Index Corruption / Stale Results](#index-corruption--stale-results)
11+
- [Embedding Provider Changed](#embedding-provider-changed)
1112
- [Native Module Build Failures](#native-module-build-failures)
1213
- [Slow Indexing Performance](#slow-indexing-performance)
1314
- [Search Returns No Results](#search-returns-no-results)
@@ -189,6 +190,47 @@ The next `/index` will rebuild from scratch.
189190

190191
---
191192

193+
## Embedding Provider Changed
194+
195+
**Error message:**
196+
```
197+
Index incompatible: <reason>. Run index with force=true to rebuild.
198+
```
199+
200+
**Cause:** The index was built with a different embedding provider or model than what's currently configured. Embeddings from different providers have different dimensions and are not compatible.
201+
202+
**Common scenarios:**
203+
- Switched from GitHub Copilot to OpenAI
204+
- Changed Ollama embedding model
205+
- Updated to a new version of the embedding model
206+
207+
**Solutions:**
208+
209+
### Force Re-index
210+
Ask the agent:
211+
> "Force reindex the codebase"
212+
213+
Or run `/index` with the force option. This will:
214+
1. Delete all existing embeddings
215+
2. Re-index all files with the new provider
216+
217+
### Why This Happens
218+
Different embedding providers produce vectors with different dimensions:
219+
220+
| Provider | Model | Dimensions |
221+
|----------|-------|------------|
222+
| GitHub Copilot | text-embedding-3-small | 1536 |
223+
| OpenAI | text-embedding-3-small | 1536 |
224+
| Google | text-embedding-004 | 768 |
225+
| Ollama | nomic-embed-text | 768 |
226+
227+
Mixing embeddings from different providers would produce garbage search results, so the plugin refuses to search until you rebuild the index.
228+
229+
### Check Current Index Metadata
230+
Run `/status` to see what provider/model the index was built with.
231+
232+
---
233+
192234
## Native Module Build Failures
193235

194236
**Error messages:**
@@ -395,6 +437,7 @@ If none of these solutions work:
395437
| No provider | `export OPENAI_API_KEY=...` or use Ollama |
396438
| Rate limited | Switch to Ollama for large codebases |
397439
| Stale results | `rm -rf .opencode/index/` and re-index |
440+
| Provider changed | Force re-index: ask agent to "force reindex" |
398441
| Slow indexing | Use Ollama locally |
399442
| No results | Run `/index` first, use descriptive queries |
400443
| Native module error | Rebuild with Rust toolchain |

src/indexer/index.ts

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,26 @@ interface FailedBatch {
9595
lastAttempt: string;
9696
}
9797

98+
interface IndexMetadata {
99+
indexVersion: string;
100+
embeddingProvider: string;
101+
embeddingModel: string;
102+
embeddingDimensions: number;
103+
createdAt: string;
104+
updatedAt: string;
105+
}
106+
107+
interface IndexCompatibility {
108+
compatible: boolean;
109+
reason?: string;
110+
storedMetadata?: IndexMetadata;
111+
currentProvider?: string;
112+
currentModel?: string;
113+
currentDimensions?: number;
114+
}
115+
116+
const INDEX_METADATA_VERSION = "1";
117+
98118
export class Indexer {
99119
private config: ParsedCodebaseIndexConfig;
100120
private projectRoot: string;
@@ -114,6 +134,7 @@ export class Indexer {
114134
private readonly maxQueryCacheSize = 100;
115135
private readonly queryCacheTtlMs = 5 * 60 * 1000;
116136
private readonly querySimilarityThreshold = 0.85;
137+
private indexCompatibility: IndexCompatibility | null = null;
117138

118139
constructor(projectRoot: string, config: ParsedCodebaseIndexConfig) {
119140
this.projectRoot = projectRoot;
@@ -254,6 +275,19 @@ export class Indexer {
254275
this.migrateFromLegacyIndex();
255276
}
256277

278+
this.indexCompatibility = this.validateIndexCompatibility(this.detectedProvider);
279+
if (!this.indexCompatibility.compatible) {
280+
this.logger.warn("Index compatibility issue detected", {
281+
reason: this.indexCompatibility.reason,
282+
storedProvider: this.indexCompatibility.storedMetadata?.embeddingProvider,
283+
storedModel: this.indexCompatibility.storedMetadata?.embeddingModel,
284+
storedDimensions: this.indexCompatibility.storedMetadata?.embeddingDimensions,
285+
currentProvider: this.indexCompatibility.currentProvider,
286+
currentModel: this.indexCompatibility.currentModel,
287+
currentDimensions: this.indexCompatibility.currentDimensions,
288+
});
289+
}
290+
257291
if (isGitRepo(this.projectRoot)) {
258292
this.currentBranch = getBranchOrDefault(this.projectRoot);
259293
this.baseBranch = getBaseBranch(this.projectRoot);
@@ -339,6 +373,92 @@ export class Indexer {
339373
this.database.addChunksToBranchBatch(this.currentBranch || "default", chunkIds);
340374
}
341375

376+
private loadIndexMetadata(): IndexMetadata | null {
377+
if (!this.database) return null;
378+
379+
const version = this.database.getMetadata("index.version");
380+
if (!version) return null;
381+
382+
return {
383+
indexVersion: version,
384+
embeddingProvider: this.database.getMetadata("index.embeddingProvider") ?? "",
385+
embeddingModel: this.database.getMetadata("index.embeddingModel") ?? "",
386+
embeddingDimensions: parseInt(this.database.getMetadata("index.embeddingDimensions") ?? "0", 10),
387+
createdAt: this.database.getMetadata("index.createdAt") ?? "",
388+
updatedAt: this.database.getMetadata("index.updatedAt") ?? "",
389+
};
390+
}
391+
392+
private saveIndexMetadata(provider: DetectedProvider): void {
393+
if (!this.database) return;
394+
395+
const now = new Date().toISOString();
396+
const existingCreatedAt = this.database.getMetadata("index.createdAt");
397+
398+
this.database.setMetadata("index.version", INDEX_METADATA_VERSION);
399+
this.database.setMetadata("index.embeddingProvider", provider.provider);
400+
this.database.setMetadata("index.embeddingModel", provider.modelInfo.model);
401+
this.database.setMetadata("index.embeddingDimensions", provider.modelInfo.dimensions.toString());
402+
this.database.setMetadata("index.updatedAt", now);
403+
404+
if (!existingCreatedAt) {
405+
this.database.setMetadata("index.createdAt", now);
406+
}
407+
}
408+
409+
private validateIndexCompatibility(provider: DetectedProvider): IndexCompatibility {
410+
const storedMetadata = this.loadIndexMetadata();
411+
412+
if (!storedMetadata) {
413+
return { compatible: true };
414+
}
415+
416+
const currentProvider = provider.provider;
417+
const currentModel = provider.modelInfo.model;
418+
const currentDimensions = provider.modelInfo.dimensions;
419+
420+
if (storedMetadata.embeddingDimensions !== currentDimensions) {
421+
return {
422+
compatible: false,
423+
reason: `Dimension mismatch: index has ${storedMetadata.embeddingDimensions}D vectors (${storedMetadata.embeddingProvider}/${storedMetadata.embeddingModel}), but current provider uses ${currentDimensions}D (${currentProvider}/${currentModel}). Run "index --force" to rebuild.`,
424+
storedMetadata,
425+
currentProvider,
426+
currentModel,
427+
currentDimensions,
428+
};
429+
}
430+
431+
if (storedMetadata.embeddingModel !== currentModel) {
432+
return {
433+
compatible: false,
434+
reason: `Model mismatch: index was built with "${storedMetadata.embeddingModel}", but current model is "${currentModel}". Embeddings may be incompatible. Run "index --force" to rebuild.`,
435+
storedMetadata,
436+
currentProvider,
437+
currentModel,
438+
currentDimensions,
439+
};
440+
}
441+
442+
return {
443+
compatible: true,
444+
storedMetadata,
445+
currentProvider,
446+
currentModel,
447+
currentDimensions,
448+
};
449+
}
450+
451+
checkCompatibility(): IndexCompatibility {
452+
if (this.indexCompatibility) {
453+
return this.indexCompatibility;
454+
}
455+
return { compatible: true };
456+
}
457+
458+
requiresRebuild(): boolean {
459+
return !this.checkCompatibility().compatible;
460+
}
461+
342462
private async ensureInitialized(): Promise<{
343463
store: VectorStore;
344464
provider: EmbeddingProviderInterface;
@@ -750,6 +870,9 @@ export class Indexer {
750870

751871
stats.durationMs = Date.now() - startTime;
752872

873+
this.saveIndexMetadata(detectedProvider);
874+
this.indexCompatibility = { compatible: true };
875+
753876
this.logger.recordIndexingEnd();
754877
this.logger.info("Indexing complete", {
755878
files: stats.totalFiles,
@@ -884,6 +1007,11 @@ export class Indexer {
8841007
name?: string;
8851008
}>
8861009
> {
1010+
const compatibility = this.checkCompatibility();
1011+
if (!compatibility.compatible) {
1012+
throw new Error(compatibility.reason ?? "Index is incompatible with current embedding provider");
1013+
}
1014+
8871015
const searchStartTime = performance.now();
8881016
const { store, provider, database } = await this.ensureInitialized();
8891017

tests/database.test.ts

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,46 @@ describe("Database", () => {
208208
});
209209
});
210210

211+
describe("index metadata contract", () => {
212+
it("should store index metadata fields", () => {
213+
db.setMetadata("index.version", "1");
214+
db.setMetadata("index.embeddingProvider", "openai");
215+
db.setMetadata("index.embeddingModel", "text-embedding-3-small");
216+
db.setMetadata("index.embeddingDimensions", "1536");
217+
db.setMetadata("index.createdAt", "2025-01-19T00:00:00Z");
218+
db.setMetadata("index.updatedAt", "2025-01-19T00:00:00Z");
219+
220+
expect(db.getMetadata("index.version")).toBe("1");
221+
expect(db.getMetadata("index.embeddingProvider")).toBe("openai");
222+
expect(db.getMetadata("index.embeddingModel")).toBe("text-embedding-3-small");
223+
expect(db.getMetadata("index.embeddingDimensions")).toBe("1536");
224+
});
225+
226+
it("should persist index metadata across database reopening", () => {
227+
const dbPath = path.join(tempDir, "persist-test.db");
228+
const db1 = new Database(dbPath);
229+
230+
db1.setMetadata("index.embeddingProvider", "ollama");
231+
db1.setMetadata("index.embeddingDimensions", "768");
232+
233+
const db2 = new Database(dbPath);
234+
235+
expect(db2.getMetadata("index.embeddingProvider")).toBe("ollama");
236+
expect(db2.getMetadata("index.embeddingDimensions")).toBe("768");
237+
});
238+
239+
it("should update index metadata on reindex", () => {
240+
db.setMetadata("index.embeddingProvider", "openai");
241+
db.setMetadata("index.embeddingDimensions", "1536");
242+
db.setMetadata("index.updatedAt", "2025-01-18T00:00:00Z");
243+
244+
db.setMetadata("index.updatedAt", "2025-01-19T12:00:00Z");
245+
246+
expect(db.getMetadata("index.embeddingProvider")).toBe("openai");
247+
expect(db.getMetadata("index.updatedAt")).toBe("2025-01-19T12:00:00Z");
248+
});
249+
});
250+
211251
describe("garbage collection", () => {
212252
it("should gc orphan embeddings", () => {
213253
const embedding = Buffer.from(new Float32Array([1.0]).buffer);

0 commit comments

Comments
 (0)