From 135325245c9b3ebc69e44a4191449b8c65d1d4d2 Mon Sep 17 00:00:00 2001
From: Helweg <kenneth@helweghansen.dk>
Date: Thu, 9 Apr 2026 08:48:33 +0200
Subject: [PATCH 01/20] feat: add pluggable reranker config

---
 src/config/schema.ts | 104 ++++++++++++++++++++++++++++---------------
 tests/config.test.ts |  69 ++++++++++++++++++++++++++++
 2 files changed, 137 insertions(+), 36 deletions(-)

diff --git a/src/config/schema.ts b/src/config/schema.ts
index f71d3dc..be005ff 100644
--- a/src/config/schema.ts
+++ b/src/config/schema.ts
@@ -49,6 +49,25 @@ export interface SearchConfig {
   contextLines: number;
 }
 
+export type RerankerProvider = "cohere" | "jina" | "custom";
+
+export interface RerankerConfig {
+  /** Whether to enable reranking. Default: false */
+  enabled: boolean;
+  /** Provider shortcut for hosted rerank APIs. Use 'custom' to provide only baseUrl. */
+  provider: RerankerProvider;
+  /** Model name for reranking */
+  model: string;
+  /** Base URL of the rerank API endpoint */
+  baseUrl: string;
+  /** API key for the rerank service */
+  apiKey?: string;
+  /** Number of top documents to rerank */
+  topN: number;
+  /** Request timeout in milliseconds */
+  timeoutMs: number;
+}
+
 export type LogLevel = "error" | "warn" | "info" | "debug";
 
 export interface DebugConfig {
@@ -83,21 +102,6 @@ export interface CustomProviderConfig {
   max_batch_size?: number;
 }
 
-export interface RerankerConfig {
-  /** Whether to enable reranking. Default: false */
-  enabled: boolean;
-  /** Base URL of the rerank API endpoint (e.g. "https://api.siliconflow.cn/v1") */
-  baseUrl: string;
-  /** Model name for reranking (e.g. "BAAI/bge-reranker-v2-m3") */
-  model: string;
-  /** API key for the rerank service */
-  apiKey?: string;
-  /** Number of top documents to rerank. Default: 20 */
-  topN?: number;
-  /** Request timeout in milliseconds. Default: 30000 */
-  timeoutMs?: number;
-}
-
 export interface CodebaseIndexConfig {
   embeddingProvider: EmbeddingProvider | 'custom' | 'auto';
   embeddingModel?: EmbeddingModelName;
@@ -164,6 +168,21 @@ function isValidFusionStrategy(value: unknown): value is SearchConfig["fusionStr
   return value === "weighted" || value === "rrf";
 }
 
+function isValidRerankerProvider(value: unknown): value is RerankerProvider {
+  return value === "cohere" || value === "jina" || value === "custom";
+}
+
+function getDefaultRerankerBaseUrl(provider: RerankerProvider): string {
+  switch (provider) {
+    case "cohere":
+      return "https://api.cohere.ai/v1";
+    case "jina":
+      return "https://api.jina.ai/v1";
+    case "custom":
+      return "";
+  }
+}
+
 function getDefaultDebugConfig(): DebugConfig {
   return {
     enabled: false,
@@ -177,16 +196,6 @@ function getDefaultDebugConfig(): DebugConfig {
   };
 }
 
-function getDefaultRerankerConfig(): RerankerConfig {
-  return {
-    enabled: false,
-    baseUrl: "https://api.siliconflow.cn/v1",
-    model: "BAAI/bge-reranker-v2-m3",
-    topN: 20,
-    timeoutMs: 30000,
-  };
-}
-
 const VALID_SCOPES: IndexScope[] = ["project", "global"];
 const VALID_LOG_LEVELS: LogLevel[] = ["error", "warn", "info", "debug"];
 
@@ -282,17 +291,6 @@ export function parseConfig(raw: unknown): ParsedCodebaseIndexConfig {
     metrics: typeof rawDebug.metrics === "boolean" ? rawDebug.metrics : defaultDebug.metrics,
   };
 
-  const defaultReranker = getDefaultRerankerConfig();
-  const rawReranker = (input.reranker && typeof input.reranker === "object" ? input.reranker : {}) as Record<string, unknown>;
-  const reranker: RerankerConfig = {
-    enabled: typeof rawReranker.enabled === "boolean" ? rawReranker.enabled : defaultReranker.enabled,
-    baseUrl: typeof rawReranker.baseUrl === "string" ? rawReranker.baseUrl.trim().replace(/\/+$/, '') : defaultReranker.baseUrl,
-    model: typeof rawReranker.model === "string" ? rawReranker.model : defaultReranker.model,
-    apiKey: getResolvedString(rawReranker.apiKey, "$root.reranker.apiKey"),
-    topN: typeof rawReranker.topN === "number" ? Math.max(1, Math.min(200, Math.floor(rawReranker.topN))) : defaultReranker.topN,
-    timeoutMs: typeof rawReranker.timeoutMs === "number" ? Math.max(1000, rawReranker.timeoutMs) : defaultReranker.timeoutMs,
-  };
-
   const rawKnowledgeBases = input.knowledgeBases;
   const knowledgeBases: string[] = isStringArray(rawKnowledgeBases)
     ? rawKnowledgeBases.filter(p => typeof p === "string" && p.trim().length > 0).map(p => p.trim())
@@ -306,6 +304,7 @@ export function parseConfig(raw: unknown): ParsedCodebaseIndexConfig {
   let embeddingProvider: EmbeddingProvider | 'custom' | 'auto';
   let embeddingModel: EmbeddingModelName | undefined = undefined;
   let customProvider: CustomProviderConfig | undefined = undefined;
+  let reranker: RerankerConfig | undefined = undefined;
   
   if (embeddingProviderValue === 'custom') {
     embeddingProvider = 'custom';
@@ -359,6 +358,39 @@ export function parseConfig(raw: unknown): ParsedCodebaseIndexConfig {
     embeddingProvider = 'auto';
   }
 
+  const rawReranker = (input.reranker && typeof input.reranker === "object"
+    ? input.reranker
+    : {}) as Record<string, unknown>;
+  const rerankerEnabled = typeof rawReranker.enabled === "boolean" ? rawReranker.enabled : false;
+  if (rerankerEnabled) {
+    const provider = isValidRerankerProvider(rawReranker.provider) ? rawReranker.provider : "custom";
+    const model = getResolvedString(rawReranker.model, "$root.reranker.model");
+    if (!model || model.trim().length === 0) {
+      throw new Error("reranker is enabled but reranker.model is missing or invalid.");
+    }
+
+    const configuredBaseUrl = getResolvedString(rawReranker.baseUrl, "$root.reranker.baseUrl");
+    const baseUrl = configuredBaseUrl?.trim() || getDefaultRerankerBaseUrl(provider);
+    if (baseUrl.length === 0) {
+      throw new Error("reranker is enabled but reranker.baseUrl is missing or invalid for provider 'custom'.");
+    }
+
+    const apiKey = getResolvedString(rawReranker.apiKey, "$root.reranker.apiKey");
+    if ((provider === "cohere" || provider === "jina") && (!apiKey || apiKey.trim().length === 0)) {
+      throw new Error(`reranker provider '${provider}' requires reranker.apiKey when enabled.`);
+    }
+
+    reranker = {
+      enabled: true,
+      provider,
+      model: model.trim(),
+      baseUrl: baseUrl.replace(/\/+$/, ""),
+      apiKey: apiKey?.trim() || undefined,
+      topN: typeof rawReranker.topN === "number" ? Math.min(50, Math.max(1, Math.floor(rawReranker.topN))) : 15,
+      timeoutMs: typeof rawReranker.timeoutMs === "number" ? Math.max(1000, Math.floor(rawReranker.timeoutMs)) : 10000,
+    };
+  }
+
   return {
     embeddingProvider,
     embeddingModel,
diff --git a/tests/config.test.ts b/tests/config.test.ts
index 59cfc7f..ac71f5d 100644
--- a/tests/config.test.ts
+++ b/tests/config.test.ts
@@ -321,6 +321,75 @@ describe("config schema", () => {
       it("should handle non-object search", () => {
         expect(parseConfig({ search: "invalid" }).search.maxResults).toBe(20);
       });
+
+      it("should parse reranker config when enabled", () => {
+        const config = parseConfig({
+          reranker: {
+            enabled: true,
+            provider: "cohere",
+            model: "rerank-v3.5",
+            apiKey: "test-key",
+            topN: 12,
+            timeoutMs: 4000,
+          },
+        });
+
+        expect(config.reranker).toEqual({
+          enabled: true,
+          provider: "cohere",
+          model: "rerank-v3.5",
+          baseUrl: "https://api.cohere.ai/v1",
+          apiKey: "test-key",
+          topN: 12,
+          timeoutMs: 4000,
+        });
+      });
+
+      it("should require model for enabled reranker", () => {
+        expect(() => parseConfig({
+          reranker: {
+            enabled: true,
+            provider: "cohere",
+            apiKey: "test-key",
+          },
+        })).toThrow("reranker is enabled but reranker.model is missing or invalid.");
+      });
+
+      it("should require apiKey for hosted reranker providers", () => {
+        expect(() => parseConfig({
+          reranker: {
+            enabled: true,
+            provider: "jina",
+            model: "jina-reranker-v2-base-multilingual",
+          },
+        })).toThrow("reranker provider 'jina' requires reranker.apiKey when enabled.");
+      });
+
+      it("should require baseUrl for custom reranker provider", () => {
+        expect(() => parseConfig({
+          reranker: {
+            enabled: true,
+            provider: "custom",
+            model: "custom-reranker",
+          },
+        })).toThrow("reranker is enabled but reranker.baseUrl is missing or invalid for provider 'custom'.");
+      });
+
+      it("should clamp reranker topN and timeoutMs", () => {
+        const config = parseConfig({
+          reranker: {
+            enabled: true,
+            provider: "custom",
+            model: "custom-reranker",
+            baseUrl: "https://rerank.example/v1",
+            topN: 999,
+            timeoutMs: 100,
+          },
+        });
+
+        expect(config.reranker?.topN).toBe(50);
+        expect(config.reranker?.timeoutMs).toBe(1000);
+      });
     });
 
     describe("custom provider config", () => {

From 65945ba034116b310c5e230267c6677bba7a008f Mon Sep 17 00:00:00 2001
From: Helweg <kenneth@helweghansen.dk>
Date: Thu, 9 Apr 2026 08:48:56 +0200
Subject: [PATCH 02/20] feat: add optional external reranker stage

---
 src/indexer/index.ts            | 132 +++++++++++++++++++++++++++++++-
 tests/retrieval-ranking.test.ts |  88 +++++++++++++++++++++
 2 files changed, 218 insertions(+), 2 deletions(-)

diff --git a/src/indexer/index.ts b/src/indexer/index.ts
index b5ccaf2..b1b2c31 100644
--- a/src/indexer/index.ts
+++ b/src/indexer/index.ts
@@ -4,7 +4,7 @@ import { performance } from "perf_hooks";
 import PQueue from "p-queue";
 import pRetry from "p-retry";
 
-import { ParsedCodebaseIndexConfig } from "../config/schema.js";
+import { ParsedCodebaseIndexConfig, type RerankerConfig } from "../config/schema.js";
 import { detectEmbeddingProvider, ConfiguredProviderInfo, tryDetectProvider, createCustomProviderInfo } from "../embeddings/detector.js";
 import {
   createEmbeddingProvider,
@@ -158,6 +158,11 @@ interface FailedBatch {
 
 type RankedCandidate = { id: string; score: number; metadata: ChunkMetadata };
 
+interface RerankDocumentPayload {
+  id: string;
+  text: string;
+}
+
 interface HybridRankOptions {
   fusionStrategy: "weighted" | "rrf";
   rrfK: number;
@@ -343,6 +348,20 @@ function splitNameTokens(name: string): Set<string> {
   return tokens;
 }
 
+function createRerankerDocumentText(candidate: RankedCandidate): string {
+  const parts = [
+    `path: ${candidate.metadata.filePath}`,
+    `chunk_type: ${candidate.metadata.chunkType}`,
+    `language: ${candidate.metadata.language}`,
+  ];
+
+  if (candidate.metadata.name) {
+    parts.push(`name: ${candidate.metadata.name}`);
+  }
+
+  return parts.join("\n");
+}
+
 function chunkTypeBoost(chunkType: string): number {
   switch (chunkType) {
     case "function":
@@ -1448,6 +1467,114 @@ export class Indexer {
     }
   }
 
+  private async rerankCandidatesWithApi(
+    query: string,
+    candidates: RankedCandidate[]
+  ): Promise<RankedCandidate[]> {
+    const reranker = this.config.reranker;
+    if (!reranker || !reranker.enabled || candidates.length <= 1) {
+      return candidates;
+    }
+
+    const topN = Math.min(reranker.topN, candidates.length);
+    const head = candidates.slice(0, topN);
+    const tail = candidates.slice(topN);
+    const documents = head.map((candidate) => ({
+      id: candidate.id,
+      text: createRerankerDocumentText(candidate),
+    }));
+
+    try {
+      const rankedIds = await this.callExternalReranker(query, documents, reranker);
+      if (rankedIds.length === 0) {
+        return candidates;
+      }
+
+      const order = new Map(rankedIds.map((id, index) => [id, index]));
+      const rerankedHead = [...head].sort((a, b) => {
+        const aRank = order.get(a.id) ?? Number.MAX_SAFE_INTEGER;
+        const bRank = order.get(b.id) ?? Number.MAX_SAFE_INTEGER;
+        if (aRank !== bRank) {
+          return aRank - bRank;
+        }
+        if (b.score !== a.score) {
+          return b.score - a.score;
+        }
+        return a.id.localeCompare(b.id);
+      });
+
+      this.logger.search("debug", "Applied external reranker", {
+        provider: reranker.provider,
+        model: reranker.model,
+        candidateCount: head.length,
+      });
+
+      return [...rerankedHead, ...tail];
+    } catch (error) {
+      this.logger.search("warn", "External reranker failed; using deterministic order", {
+        provider: reranker.provider,
+        model: reranker.model,
+        error: getErrorMessage(error),
+      });
+      return candidates;
+    }
+  }
+
+  private async callExternalReranker(
+    query: string,
+    documents: RerankDocumentPayload[],
+    reranker: RerankerConfig
+  ): Promise<string[]> {
+    const headers: Record<string, string> = {
+      "Content-Type": "application/json",
+    };
+    if (reranker.apiKey) {
+      headers.Authorization = `Bearer ${reranker.apiKey}`;
+    }
+
+    const controller = new AbortController();
+    const timeout = setTimeout(() => controller.abort(), reranker.timeoutMs);
+    try {
+      const response = await fetch(`${reranker.baseUrl}/rerank`, {
+        method: "POST",
+        headers,
+        body: JSON.stringify({
+          model: reranker.model,
+          query,
+          documents: documents.map((document) => document.text),
+          top_n: documents.length,
+          return_documents: false,
+        }),
+        signal: controller.signal,
+      });
+
+      if (!response.ok) {
+        throw new Error(`Reranker API error: ${response.status} - ${await response.text()}`);
+      }
+
+      const body = await response.json() as {
+        results?: Array<{ index?: number; relevance_score?: number }>;
+      };
+      if (!Array.isArray(body.results)) {
+        throw new Error("Reranker API returned unexpected response format.");
+      }
+
+      return body.results
+        .map((result) => {
+          const index = typeof result.index === "number" ? result.index : -1;
+          return documents[index]?.id;
+        })
+        .filter((id): id is string => typeof id === "string");
+    } catch (error) {
+      if (error instanceof Error && error.name === "AbortError") {
+        throw new Error(`Reranker request timed out after ${reranker.timeoutMs}ms`);
+      }
+      throw error;
+    } finally {
+      clearTimeout(timeout);
+    }
+  }
+
   async initialize(): Promise<void> {
     if (this.config.embeddingProvider === 'custom') {
       if (!this.config.customProvider) {
@@ -2477,11 +2604,12 @@ export class Indexer {
       hybridWeight,
       prioritizeSourcePaths: sourceIntent,
     });
+    const rerankedCombined = await this.rerankCandidatesWithApi(query, combined);
     const fusionMs = performance.now() - fusionStartTime;
 
     const rescued = promoteIdentifierMatches(
       query,
-      combined,
+      rerankedCombined,
       semanticCandidates,
       keywordCandidates,
       database,
diff --git a/tests/retrieval-ranking.test.ts b/tests/retrieval-ranking.test.ts
index 11e1900..dab710a 100644
--- a/tests/retrieval-ranking.test.ts
+++ b/tests/retrieval-ranking.test.ts
@@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
 
 import type { ChunkMetadata } from "../src/native/index.js";
 import {
+  Indexer,
   extractFilePathHint,
   fuseResultsRrf,
   fuseResultsWeighted,
@@ -11,6 +12,7 @@ import {
   stripFilePathHint,
   rerankResults,
 } from "../src/indexer/index.js";
+import { parseConfig } from "../src/config/schema.js";
 
 type Candidate = { id: string; score: number; metadata: ChunkMetadata };
 
@@ -333,4 +335,90 @@ describe("retrieval ranking", () => {
     const query = "where is createSystem implementation in packages/react/src/styled-system/system.ts";
     expect(stripFilePathHint(query)).toBe("where is createSystem implementation");
   });
+
+  it("applies external reranker ordering when configured", async () => {
+    const config = parseConfig({
+      embeddingProvider: "custom",
+      customProvider: {
+        baseUrl: "http://localhost:11434/v1",
+        model: "mock-embed",
+        dimensions: 8,
+      },
+      reranker: {
+        enabled: true,
+        provider: "custom",
+        model: "mock-reranker",
+        baseUrl: "https://rerank.example/v1",
+        topN: 3,
+      },
+    });
+    const indexer = new Indexer("/repo", config);
+
+    const fetchSpy = globalThis.fetch;
+    globalThis.fetch = (async (input) => {
+      if (String(input).includes("/rerank")) {
+        return new Response(JSON.stringify({
+          results: [
+            { index: 2, relevance_score: 0.99 },
+            { index: 0, relevance_score: 0.72 },
+            { index: 1, relevance_score: 0.4 },
+          ],
+        }), { status: 200 });
+      }
+      return new Response(JSON.stringify({ data: [{ embedding: Array.from({ length: 8 }, () => 0.1) }], usage: { total_tokens: 1 } }), { status: 200 });
+    }) as typeof fetch;
+
+    const candidates: Candidate[] = [
+      { id: "first", score: 0.9, metadata: meta({ filePath: "/repo/src/first.ts", name: "firstThing", chunkType: "function" }) },
+      { id: "second", score: 0.89, metadata: meta({ filePath: "/repo/src/second.ts", name: "secondThing", chunkType: "function" }) },
+      { id: "third", score: 0.88, metadata: meta({ filePath: "/repo/src/third.ts", name: "thirdThing", chunkType: "function" }) },
+    ];
+
+    const reranked = await (indexer as unknown as {
+      rerankCandidatesWithApi(query: string, items: Candidate[]): Promise<Candidate[]>;
+    }).rerankCandidatesWithApi("find third thing", candidates);
+
+    expect(reranked.map((candidate) => candidate.id)).toEqual(["third", "first", "second"]);
+    globalThis.fetch = fetchSpy;
+  });
+
+  it("falls back to deterministic order when external reranker fails", async () => {
+    const config = parseConfig({
+      embeddingProvider: "custom",
+      customProvider: {
+        baseUrl: "http://localhost:11434/v1",
+        model: "mock-embed",
+        dimensions: 8,
+      },
+      reranker: {
+        enabled: true,
+        provider: "custom",
+        model: "mock-reranker",
+        baseUrl: "https://rerank.example/v1",
+        topN: 2,
+      },
+    });
+    const indexer = new Indexer("/repo", config);
+
+    const fetchSpy = globalThis.fetch;
+    globalThis.fetch = (async (input) => {
+      if (String(input).includes("/rerank")) {
+        return new Response("boom", { status: 500 });
+      }
+      return new Response(JSON.stringify({ data: [{ embedding: Array.from({ length: 8 }, () => 0.1) }], usage: { total_tokens: 1 } }), { status: 200 });
+    }) as typeof fetch;
+
+    const candidates: Candidate[] = [
+      { id: "first", score: 0.9, metadata: meta({ filePath: "/repo/src/first.ts", name: "firstThing", chunkType: "function" }) },
+      { id: "second", score: 0.89, metadata: meta({ filePath: "/repo/src/second.ts", name: "secondThing", chunkType: "function" }) },
+      { id: "third", score: 0.88, metadata: meta({ filePath: "/repo/src/third.ts", name: "thirdThing", chunkType: "function" }) },
+    ];
+
+    const reranked = await (indexer as unknown as {
+      rerankCandidatesWithApi(query: string, items: Candidate[]): Promise<Candidate[]>;
+    }).rerankCandidatesWithApi("find third thing", candidates);
+
+    expect(reranked.map((candidate) => candidate.id)).toEqual(["first", "second", "third"]);
+    globalThis.fetch = fetchSpy;
+  });
 });

From 8b044d932de24ca6fc35e412279ba4d1d3566f77 Mon Sep 17 00:00:00 2001
From: Helweg <kenneth@helweghansen.dk>
Date: Sat, 11 Apr 2026 21:16:33 +0200
Subject: [PATCH 03/20] feat: improve external reranker payload quality

---
 README.md                       | 44 ++++++++++----------------
 src/indexer/index.ts            | 55 ++++++++++++++++++++++-----------
 tests/retrieval-ranking.test.ts | 53 ++++++++++++++++++++++++++-----
 3 files changed, 98 insertions(+), 54 deletions(-)

diff --git a/README.md b/README.md
index 5260db7..55085e5 100644
--- a/README.md
+++ b/README.md
@@ -532,18 +532,14 @@ Zero-config by default (uses `auto` mode). Customize in `.opencode/codebase-inde
     "rerankTopN": 20,                         // Deterministic rerank depth
     "contextLines": 0                         // Extra lines before/after match
   },
-
-  // === Reranking API ===
   "reranker": {
-    "enabled": true,                          // Enable API reranking
-    "baseUrl": "https://api.siliconflow.cn/v1",
-    "model": "BAAI/bge-reranker-v2-m3",
-    "apiKey": "{env:SILICONFLOW_API_KEY}",
-    "topN": 20,                               // Number of results to rerank
-    "timeoutMs": 30000                        // Request timeout (ms)
+    "enabled": false,
+    "provider": "cohere",
+    "model": "rerank-v3.5",
+    "apiKey": "{env:RERANK_API_KEY}",
+    "topN": 15,
+    "timeoutMs": 10000
   },
-
-  // === Debug ===
   "debug": {
     "enabled": false,                         // Enable debug logging
     "logLevel": "info",                       // error | warn | info | debug
@@ -604,23 +600,14 @@ String values in `codebase-index.json` can reference environment variables with
 | `rrfK` | `60` | RRF smoothing constant. Higher values flatten rank impact, lower values prioritize top-ranked candidates more strongly |
 | `rerankTopN` | `20` | Deterministic rerank depth cap. Applies lightweight name/path/chunk-type rerank to top-N only |
 | `contextLines` | `0` | Extra lines to include before/after each match |
-| **reranker** | | |
-| `reranker.enabled` | `false` | Enable API-based reranking |
-| `reranker.baseUrl` | - | Rerank API endpoint URL |
-| `reranker.model` | - | Reranking model name (e.g. `BAAI/bge-reranker-v2-m3`) |
-| `reranker.apiKey` | - | API key for reranking service (use `{env:VAR}` for security) |
-| `reranker.topN` | `20` | Number of top results to rerank via API |
-| `reranker.timeoutMs` | `30000` | Rerank API request timeout in milliseconds |
-| **customProvider** | | |
-| `customProvider.baseUrl` | - | Base URL of OpenAI-compatible embeddings API (e.g. `https://api.siliconflow.cn/v1`) |
-| `customProvider.model` | - | Model name (e.g. `BAAI/bge-m3`, `nomic-embed-text`) |
-| `customProvider.dimensions` | - | Vector dimensions (e.g. `1024` for BGE-M3, `768` for nomic-embed-text) |
-| `customProvider.apiKey` | - | API key (use `{env:VAR}` for security) |
-| `customProvider.maxTokens` | `8192` | Max tokens per input text |
-| `customProvider.timeoutMs` | `30000` | Request timeout in milliseconds |
-| `customProvider.concurrency` | `3` | Max concurrent embedding requests |
-| `customProvider.requestIntervalMs` | `1000` | Minimum delay between requests (ms). Set to `0` for local servers |
-| `customProvider.maxBatchSize` | - | Max inputs per `/embeddings` request. Cap for servers with batch limits |
+| **reranker** | | Optional second-stage model reranker for the top candidate pool |
+| `enabled` | `false` | Turn external reranking on/off |
+| `provider` | `"custom"` | Hosted shortcuts: `cohere`, `jina`, or `custom` |
+| `model` | — | Reranker model name required when enabled |
+| `baseUrl` | provider default | Override reranker endpoint base URL. `cohere` → `https://api.cohere.ai/v1`, `jina` → `https://api.jina.ai/v1` |
+| `apiKey` | — | API key for hosted reranker providers |
+| `topN` | `15` | Number of top candidates to send to the external reranker |
+| `timeoutMs` | `10000` | Timeout for external rerank requests |
 | **debug** | | |
 | `enabled` | `false` | Enable debug logging and metrics collection |
 | `logLevel` | `"info"` | Log level: `error`, `warn`, `info`, `debug` |
@@ -633,9 +620,10 @@ String values in `codebase-index.json` can reference environment variables with
 
 ### Retrieval ranking behavior
 
-- `codebase_search` and `codebase_peek` use the hybrid path: semantic + keyword retrieval → fusion (`fusionStrategy`) → deterministic rerank (`rerankTopN`) → filtering.
+- `codebase_search` and `codebase_peek` use the hybrid path: semantic + keyword retrieval → fusion (`fusionStrategy`) → deterministic rerank (`rerankTopN`) → optional external reranker (`reranker`) → filtering.
 - `find_similar` stays semantic-only: semantic retrieval + deterministic rerank only (no keyword retrieval, no RRF).
 - For compatibility rollbacks, set `search.fusionStrategy` to `"weighted"` to use the legacy weighted fusion path.
+- When enabled, the external reranker sees path metadata plus a bounded on-disk code snippet for each candidate so it can distinguish real implementations from docs/tests more reliably.
 - Retrieval benchmark artifacts are separated by role:
   - baseline (versioned): `benchmarks/baselines/retrieval-baseline.json`
   - latest candidate run (generated): `benchmark-results/retrieval-candidate.json`
diff --git a/src/indexer/index.ts b/src/indexer/index.ts
index b1b2c31..bd66dd9 100644
--- a/src/indexer/index.ts
+++ b/src/indexer/index.ts
@@ -348,20 +348,6 @@ function splitNameTokens(name: string): Set<string> {
   return tokens;
 }
 
-function createRerankerDocumentText(candidate: RankedCandidate): string {
-  const parts = [
-    `path: ${candidate.metadata.filePath}`,
-    `chunk_type: ${candidate.metadata.chunkType}`,
-    `language: ${candidate.metadata.language}`,
-  ];
-
-  if (candidate.metadata.name) {
-    parts.push(`name: ${candidate.metadata.name}`);
-  }
-
-  return parts.join("\n");
-}
-
 function chunkTypeBoost(chunkType: string): number {
   switch (chunkType) {
     case "function":
@@ -1479,10 +1465,12 @@ export class Indexer {
     const topN = Math.min(reranker.topN, candidates.length);
     const head = candidates.slice(0, topN);
     const tail = candidates.slice(topN);
-    const documents = head.map((candidate) => ({
-      id: candidate.id,
-      text: createRerankerDocumentText(candidate),
-    }));
+    const documents = await Promise.all(
+      head.map(async (candidate) => ({
+        id: candidate.id,
+        text: await this.createRerankerDocumentText(candidate),
+      }))
+    );
 
     try {
       const rankedIds = await this.callExternalReranker(query, documents, reranker);
@@ -1575,6 +1563,37 @@ export class Indexer {
     }
   }
 
+  private async createRerankerDocumentText(candidate: RankedCandidate): Promise<string> {
+    const parts = [
+      `path: ${candidate.metadata.filePath}`,
+      `chunk_type: ${candidate.metadata.chunkType}`,
+      `language: ${candidate.metadata.language}`,
+      `lines: ${candidate.metadata.startLine}-${candidate.metadata.endLine}`,
+    ];
+
+    if (candidate.metadata.name) {
+      parts.push(`name: ${candidate.metadata.name}`);
+    }
+
+    const intent = isLikelyImplementationPath(candidate.metadata.filePath) ? "implementation" : "doc_or_test";
+    parts.push(`intent_hint: ${intent}`);
+
+    try {
+      const fileContent = await fsPromises.readFile(candidate.metadata.filePath, "utf-8");
+      const lines = fileContent.split("\n");
+      const snippetStartLine = Math.max(1, candidate.metadata.startLine - 2);
+      const snippetEndLine = Math.min(lines.length, candidate.metadata.endLine + 2);
+      const snippet = lines.slice(snippetStartLine - 1, snippetEndLine).join("\n").trim();
+      parts.push("snippet:");
+      parts.push(snippet.length > 0 ? snippet : "[empty]");
+    } catch {
+      parts.push("snippet:");
+      parts.push("[unavailable]");
+    }
+
+    return parts.join("\n");
+  }
+
   async initialize(): Promise<void> {
     if (this.config.embeddingProvider === 'custom') {
       if (!this.config.customProvider) {
diff --git a/tests/retrieval-ranking.test.ts b/tests/retrieval-ranking.test.ts
index dab710a..c286aad 100644
--- a/tests/retrieval-ranking.test.ts
+++ b/tests/retrieval-ranking.test.ts
@@ -1,4 +1,8 @@
-import { describe, expect, it } from "vitest";
+import * as fs from "fs";
+import * as os from "os";
+import * as path from "path";
+
+import { afterEach, describe, expect, it } from "vitest";
 
 import type { ChunkMetadata } from "../src/native/index.js";
 import {
@@ -16,6 +20,8 @@ import { parseConfig } from "../src/config/schema.js";
 
 type Candidate = { id: string; score: number; metadata: ChunkMetadata };
 
+const tempDirs: string[] = [];
+
 function meta(overrides: Partial<ChunkMetadata>): ChunkMetadata {
   return {
     filePath: "/repo/src/unknown.ts",
@@ -28,7 +34,25 @@ function meta(overrides: Partial<ChunkMetadata>): ChunkMetadata {
   };
 }
 
+function createTempFile(relativePath: string, content: string): string {
+  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "reranker-doc-"));
+  tempDirs.push(tempDir);
+  const filePath = path.join(tempDir, relativePath);
+  fs.mkdirSync(path.dirname(filePath), { recursive: true });
+  fs.writeFileSync(filePath, content, "utf-8");
+  return filePath;
+}
+
 describe("retrieval ranking", () => {
+  afterEach(() => {
+    while (tempDirs.length > 0) {
+      const dir = tempDirs.pop();
+      if (dir) {
+        fs.rmSync(dir, { recursive: true, force: true });
+      }
+    }
+  });
+
   it("fuses hybrid results using RRF rank ordering", () => {
     const semantic: Candidate[] = [
       { id: "a", score: 0.91, metadata: meta({ filePath: "/repo/src/auth.ts", name: "validateAuth", chunkType: "function" }) },
@@ -354,9 +378,15 @@ describe("retrieval ranking", () => {
     });
     const indexer = new Indexer("/repo", config);
 
+    const firstPath = createTempFile("src/first.ts", "export function firstThing() {\n  return 'first';\n}\n");
+    const secondPath = createTempFile("src/second.ts", "export function secondThing() {\n  return 'second';\n}\n");
+    const thirdPath = createTempFile("src/third.ts", "export function thirdThing() {\n  return 'third';\n}\n");
+
     const fetchSpy = globalThis.fetch;
-    globalThis.fetch = (async (input) => {
+    let rerankBody: { documents?: string[] } | undefined;
+    globalThis.fetch = (async (input, init) => {
       if (String(input).includes("/rerank")) {
+        rerankBody = JSON.parse(String(init?.body ?? "{}")) as { documents?: string[] };
         return new Response(JSON.stringify({
           results: [
             { index: 2, relevance_score: 0.99 },
@@ -369,9 +399,9 @@ describe("retrieval ranking", () => {
     }) as typeof fetch;
 
     const candidates: Candidate[] = [
-      { id: "first", score: 0.9, metadata: meta({ filePath: "/repo/src/first.ts", name: "firstThing", chunkType: "function" }) },
-      { id: "second", score: 0.89, metadata: meta({ filePath: "/repo/src/second.ts", name: "secondThing", chunkType: "function" }) },
-      { id: "third", score: 0.88, metadata: meta({ filePath: "/repo/src/third.ts", name: "thirdThing", chunkType: "function" }) },
+      { id: "first", score: 0.9, metadata: meta({ filePath: firstPath, name: "firstThing", chunkType: "function", startLine: 1, endLine: 3 }) },
+      { id: "second", score: 0.89, metadata: meta({ filePath: secondPath, name: "secondThing", chunkType: "function", startLine: 1, endLine: 3 }) },
+      { id: "third", score: 0.88, metadata: meta({ filePath: thirdPath, name: "thirdThing", chunkType: "function", startLine: 1, endLine: 3 }) },
     ];
 
     const reranked = await (indexer as unknown as {
@@ -379,6 +409,9 @@ describe("retrieval ranking", () => {
     }).rerankCandidatesWithApi("find third thing", candidates);
 
     expect(reranked.map((candidate) => candidate.id)).toEqual(["third", "first", "second"]);
+    expect(rerankBody?.documents?.[0]).toContain("snippet:");
+    expect(rerankBody?.documents?.[0]).toContain("export function firstThing()");
+    expect(rerankBody?.documents?.[0]).toContain("intent_hint: implementation");
     globalThis.fetch = fetchSpy;
   });
 
@@ -400,6 +433,10 @@ describe("retrieval ranking", () => {
     });
     const indexer = new Indexer("/repo", config);
 
+    const firstPath = createTempFile("src/first.ts", "export function firstThing() {\n  return 'first';\n}\n");
+    const secondPath = createTempFile("src/second.ts", "export function secondThing() {\n  return 'second';\n}\n");
+    const thirdPath = createTempFile("src/third.ts", "export function thirdThing() {\n  return 'third';\n}\n");
+
     const fetchSpy = globalThis.fetch;
     globalThis.fetch = (async (input) => {
       if (String(input).includes("/rerank")) {
@@ -409,9 +446,9 @@ describe("retrieval ranking", () => {
     }) as typeof fetch;
 
     const candidates: Candidate[] = [
-      { id: "first", score: 0.9, metadata: meta({ filePath: "/repo/src/first.ts", name: "firstThing", chunkType: "function" }) },
-      { id: "second", score: 0.89, metadata: meta({ filePath: "/repo/src/second.ts", name: "secondThing", chunkType: "function" }) },
-      { id: "third", score: 0.88, metadata: meta({ filePath: "/repo/src/third.ts", name: "thirdThing", chunkType: "function" }) },
+      { id: "first", score: 0.9, metadata: meta({ filePath: firstPath, name: "firstThing", chunkType: "function", startLine: 1, endLine: 3 }) },
+      { id: "second", score: 0.89, metadata: meta({ filePath: secondPath, name: "secondThing", chunkType: "function", startLine: 1, endLine: 3 }) },
+      { id: "third", score: 0.88, metadata: meta({ filePath: thirdPath, name: "thirdThing", chunkType: "function", startLine: 1, endLine: 3 }) },
     ];
 
     const reranked = await (indexer as unknown as {

From dfbf6e9a601d19e9f5c3923f3383904df22cb2cd Mon Sep 17 00:00:00 2001
From: Helweg <kenneth@helweghansen.dk>
Date: Sat, 11 Apr 2026 21:22:10 +0200
Subject: [PATCH 04/20] feat: guard external reranker intent ordering

---
 src/indexer/index.ts             | 112 +++++++++++++++++++++-----
 tests/search-integration.test.ts | 132 +++++++++++++++++++++++++++++++
 2 files changed, 223 insertions(+), 21 deletions(-)

diff --git a/src/indexer/index.ts b/src/indexer/index.ts
index bd66dd9..6294716 100644
--- a/src/indexer/index.ts
+++ b/src/indexer/index.ts
@@ -163,6 +163,8 @@ interface RerankDocumentPayload {
   text: string;
 }
 
+type ExternalRerankBand = "implementation" | "documentation" | "test" | "other";
+
 interface HybridRankOptions {
   fusionStrategy: "weighted" | "rrf";
   rrfK: number;
@@ -388,6 +390,42 @@ function isLikelyImplementationPath(filePath: string): boolean {
   return true;
 }
 
+function isDocumentationPath(filePath: string): boolean {
+  const lowered = filePath.toLowerCase();
+  const ext = lowered.split(".").pop() ?? "";
+  return lowered.includes("readme") || ["md", "mdx", "rst", "adoc", "txt"].includes(ext);
+}
+
+function classifyExternalRerankBand(
+  candidate: RankedCandidate,
+  preferSourcePaths: boolean,
+  docIntent: boolean
+): ExternalRerankBand {
+  const isDocOrTest = isTestOrDocPath(candidate.metadata.filePath);
+  const isDocumentation = isDocumentationPath(candidate.metadata.filePath);
+  const isImplementation = isLikelyImplementationPath(candidate.metadata.filePath) &&
+    isImplementationChunkType(candidate.metadata.chunkType);
+
+  if (preferSourcePaths) {
+    if (isImplementation) return "implementation";
+    if (isDocumentation) return "documentation";
+    if (isDocOrTest) return "test";
+    return "other";
+  }
+
+  if (docIntent) {
+    if (isDocumentation) return "documentation";
+    if (isImplementation) return "implementation";
+    if (isDocOrTest) return "test";
+    return "other";
+  }
+
+  if (isImplementation) return "implementation";
+  if (isDocumentation) return "documentation";
+  if (isDocOrTest) return "test";
+  return "other";
+}
+
 function classifyQueryIntent(tokens: string[]): "source" | "doc_test" {
   const sourceIntentHits = tokens.filter((t) => SOURCE_INTENT_HINTS.has(t)).length;
   const docTestIntentHits = tokens.filter((t) => DOC_TEST_INTENT_HINTS.has(t)).length;
@@ -1462,39 +1500,71 @@ export class Indexer {
       return candidates;
     }
 
+    const queryTokens = Array.from(tokenizeTextForRanking(query));
+    const preferSourcePaths = classifyQueryIntentRaw(query) === "source";
+    const docIntent = classifyDocIntent(queryTokens) === "docs";
+
     const topN = Math.min(reranker.topN, candidates.length);
     const head = candidates.slice(0, topN);
     const tail = candidates.slice(topN);
-    const documents = await Promise.all(
-      head.map(async (candidate) => ({
-        id: candidate.id,
-        text: await this.createRerankerDocumentText(candidate),
-      }))
-    );
+    const grouped = new Map<ExternalRerankBand, RankedCandidate[]>([
+      ["implementation", []],
+      ["documentation", []],
+      ["test", []],
+      ["other", []],
+    ]);
 
-    try {
-      const rankedIds = await this.callExternalReranker(query, documents, reranker);
-      if (rankedIds.length === 0) {
-        return candidates;
-      }
+    for (const candidate of head) {
+      const band = classifyExternalRerankBand(candidate, preferSourcePaths, docIntent);
+      grouped.get(band)?.push(candidate);
+    }
+
+    const orderedBands: ExternalRerankBand[] = preferSourcePaths
+      ? ["implementation", "other", "documentation", "test"]
+      : docIntent
+        ? ["documentation", "implementation", "other", "test"]
+        : ["implementation", "other", "documentation", "test"];
 
-      const order = new Map(rankedIds.map((id, index) => [id, index]));
-      const rerankedHead = [...head].sort((a, b) => {
-        const aRank = order.get(a.id) ?? Number.MAX_SAFE_INTEGER;
-        const bRank = order.get(b.id) ?? Number.MAX_SAFE_INTEGER;
-        if (aRank !== bRank) {
-          return aRank - bRank;
+    try {
+      const rerankedHead: RankedCandidate[] = [];
+      for (const band of orderedBands) {
+        const bandCandidates = grouped.get(band) ?? [];
+        if (bandCandidates.length <= 1) {
+          rerankedHead.push(...bandCandidates);
+          continue;
         }
-        if (b.score !== a.score) {
-          return b.score - a.score;
+
+        const documents = await Promise.all(
+          bandCandidates.map(async (candidate) => ({
+            id: candidate.id,
+            text: await this.createRerankerDocumentText(candidate),
+          }))
+        );
+        const rankedIds = await this.callExternalReranker(query, documents, reranker);
+        if (rankedIds.length === 0) {
+          rerankedHead.push(...bandCandidates);
+          continue;
         }
-        return a.id.localeCompare(b.id);
-      });
+
+        const order = new Map(rankedIds.map((id, index) => [id, index]));
+        rerankedHead.push(...[...bandCandidates].sort((a, b) => {
+          const aRank = order.get(a.id) ?? Number.MAX_SAFE_INTEGER;
+          const bRank = order.get(b.id) ?? Number.MAX_SAFE_INTEGER;
+          if (aRank !== bRank) {
+            return aRank - bRank;
+          }
+          if (b.score !== a.score) {
+            return b.score - a.score;
+          }
+          return a.id.localeCompare(b.id);
+        }));
+      }
 
       this.logger.search("debug", "Applied external reranker", {
         provider: reranker.provider,
         model: reranker.model,
         candidateCount: head.length,
+        bands: orderedBands,
       });
 
       return [...rerankedHead, ...tail];
diff --git a/tests/search-integration.test.ts b/tests/search-integration.test.ts
index df69ffd..b2bbc72 100644
--- a/tests/search-integration.test.ts
+++ b/tests/search-integration.test.ts
@@ -214,4 +214,136 @@ export function rerankResults(query: string) { return rankHybridResults(query);
     expect(withOverride[0]?.filePath).toContain("/app/indexer/index.ts");
     expect(withOverride[0]?.filePath).not.toContain("/README.md");
   });
+
+  it("keeps implementation results ahead of docs even when external reranker prefers docs for implementation intent", async () => {
+    fetchSpy.mockImplementation(async (url, init) => {
+      if (String(url).includes("/rerank")) {
+        return new Response(JSON.stringify({
+          results: [
+            { index: 0, relevance_score: 0.99 },
+            { index: 1, relevance_score: 0.5 },
+          ],
+        }), { status: 200 });
+      }
+
+      const body = JSON.parse(String(init?.body ?? "{}")) as { input?: string[] };
+      const texts = Array.isArray(body.input) ? body.input : [];
+      const data = texts.map((text) => {
+        let seed = 0;
+        for (const ch of text) {
+          seed = (seed * 31 + ch.charCodeAt(0)) % 1000;
+        }
+        const embedding = Array.from({ length: 8 }, (_, idx) => ((seed + idx * 17) % 997) / 997);
+        return { embedding };
+      });
+
+      return new Response(JSON.stringify({
+        data,
+        usage: { total_tokens: Math.max(1, texts.length * 8) },
+      }), { status: 200 });
+    });
+
+    const config = parseConfig({
+      embeddingProvider: "custom",
+      customProvider: {
+        baseUrl: "http://localhost:11434/v1",
+        model: "mock-embedding-model",
+        dimensions: 8,
+      },
+      reranker: {
+        enabled: true,
+        provider: "custom",
+        model: "mock-reranker",
+        baseUrl: "https://rerank.example/v1",
+        topN: 10,
+      },
+      indexing: {
+        watchFiles: false,
+      },
+      search: {
+        maxResults: 10,
+        minScore: 0,
+        fusionStrategy: "rrf",
+        rrfK: 60,
+        rerankTopN: 20,
+      },
+    });
+
+    const indexer = new Indexer(tempDir, config);
+    await indexer.index();
+
+    const results = await indexer.search("where is rankHybridResults implementation", 5, {
+      metadataOnly: true,
+      filterByBranch: false,
+    });
+
+    expect(results[0]?.filePath).toContain("/app/indexer/index.ts");
+    expect(results[0]?.filePath).not.toContain("/README.md");
+  });
+
+  it("keeps documentation results ahead of code when external reranker prefers code for doc intent", async () => {
+    fetchSpy.mockImplementation(async (url, init) => {
+      if (String(url).includes("/rerank")) {
+        return new Response(JSON.stringify({
+          results: [
+            { index: 1, relevance_score: 0.99 },
+            { index: 0, relevance_score: 0.4 },
+          ],
+        }), { status: 200 });
+      }
+
+      const body = JSON.parse(String(init?.body ?? "{}")) as { input?: string[] };
+      const texts = Array.isArray(body.input) ? body.input : [];
+      const data = texts.map((text) => {
+        let seed = 0;
+        for (const ch of text) {
+          seed = (seed * 31 + ch.charCodeAt(0)) % 1000;
+        }
+        const embedding = Array.from({ length: 8 }, (_, idx) => ((seed + idx * 17) % 997) / 997);
+        return { embedding };
+      });
+
+      return new Response(JSON.stringify({
+        data,
+        usage: { total_tokens: Math.max(1, texts.length * 8) },
+      }), { status: 200 });
+    });
+
+    const config = parseConfig({
+      embeddingProvider: "custom",
+      customProvider: {
+        baseUrl: "http://localhost:11434/v1",
+        model: "mock-embedding-model",
+        dimensions: 8,
+      },
+      reranker: {
+        enabled: true,
+        provider: "custom",
+        model: "mock-reranker",
+        baseUrl: "https://rerank.example/v1",
+        topN: 10,
+      },
+      indexing: {
+        watchFiles: false,
+      },
+      search: {
+        maxResults: 10,
+        minScore: 0,
+        fusionStrategy: "rrf",
+        rrfK: 60,
+        rerankTopN: 20,
+      },
+    });
+
+    const indexer = new Indexer(tempDir, config);
+    await indexer.index();
+
+    const results = await indexer.search("where is rankHybridResults documentation", 5, {
+      metadataOnly: true,
+      filterByBranch: false,
+    });
+
+    expect(results[0]?.filePath).toContain("/README.md");
+    expect(results[0]?.filePath).not.toContain("/app/indexer/index.ts");
+  });
 });

From 6fcfcf50966fd65d2372e4ca4b46d0d769057469 Mon Sep 17 00:00:00 2001
From: Helweg <kenneth@helweghansen.dk>
Date: Sat, 11 Apr 2026 21:25:10 +0200
Subject: [PATCH 05/20] feat: skip external reranking for exact definitions

---
 src/indexer/index.ts            | 18 +++++++++--
 tests/retrieval-ranking.test.ts | 55 +++++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/src/indexer/index.ts b/src/indexer/index.ts
index 6294716..9b096fd 100644
--- a/src/indexer/index.ts
+++ b/src/indexer/index.ts
@@ -1493,13 +1493,21 @@ export class Indexer {
 
   private async rerankCandidatesWithApi(
     query: string,
-    candidates: RankedCandidate[]
+    candidates: RankedCandidate[],
+    options?: {
+      definitionIntent?: boolean;
+      hasIdentifierHints?: boolean;
+    }
   ): Promise<RankedCandidate[]> {
     const reranker = this.config.reranker;
     if (!reranker || !reranker.enabled || candidates.length <= 1) {
       return candidates;
     }
 
+    if (options?.definitionIntent === true || options?.hasIdentifierHints === true) {
+      return candidates;
+    }
+
     const queryTokens = Array.from(tokenizeTextForRanking(query));
     const preferSourcePaths = classifyQueryIntentRaw(query) === "source";
     const docIntent = classifyDocIntent(queryTokens) === "docs";
@@ -2620,6 +2628,7 @@ export class Indexer {
     const rerankTopN = this.config.search.rerankTopN;
     const filterByBranch = options?.filterByBranch ?? true;
     const sourceIntent = options?.definitionIntent === true || classifyQueryIntentRaw(query) === "source";
+    const identifierHints = extractIdentifierHints(query);
 
     this.logger.search("debug", "Starting search", {
       query,
@@ -2693,7 +2702,10 @@ export class Indexer {
       hybridWeight,
       prioritizeSourcePaths: sourceIntent,
     });
-    const rerankedCombined = await this.rerankCandidatesWithApi(query, combined);
+    const rerankedCombined = await this.rerankCandidatesWithApi(query, combined, {
+      definitionIntent: options?.definitionIntent === true,
+      hasIdentifierHints: identifierHints.length > 0,
+    });
     const fusionMs = performance.now() - fusionStartTime;
 
     const rescued = promoteIdentifierMatches(
@@ -2734,7 +2746,7 @@ export class Indexer {
     const prePrimaryLane = mergeTieredResults(deterministicIdentifierLane, identifierLane, maxResults * 4);
     const primaryLane = mergeTieredResults(prePrimaryLane, symbolLane, maxResults * 4);
     const tiered = mergeTieredResults(primaryLane, rescued, maxResults * 4);
-    const hasCodeHints = extractCodeTermHints(query).length > 0 || extractIdentifierHints(query).length > 0;
+    const hasCodeHints = extractCodeTermHints(query).length > 0 || identifierHints.length > 0;
 
     const baseFiltered = tiered.filter((r) => {
       if (r.score < this.config.search.minScore) return false;
diff --git a/tests/retrieval-ranking.test.ts b/tests/retrieval-ranking.test.ts
index c286aad..5bebc52 100644
--- a/tests/retrieval-ranking.test.ts
+++ b/tests/retrieval-ranking.test.ts
@@ -458,4 +458,59 @@ describe("retrieval ranking", () => {
     expect(reranked.map((candidate) => candidate.id)).toEqual(["first", "second", "third"]);
     globalThis.fetch = fetchSpy;
   });
+
+  it("skips external reranker for definition-intent queries with identifier hints", async () => {
+    const config = parseConfig({
+      embeddingProvider: "custom",
+      customProvider: {
+        baseUrl: "http://localhost:11434/v1",
+        model: "mock-embed",
+        dimensions: 8,
+      },
+      reranker: {
+        enabled: true,
+        provider: "custom",
+        model: "mock-reranker",
+        baseUrl: "https://rerank.example/v1",
+        topN: 3,
+      },
+    });
+    const indexer = new Indexer("/repo", config);
+
+    const fetchSpy = globalThis.fetch;
+    let rerankCalled = false;
+    globalThis.fetch = (async (input) => {
+      if (String(input).includes("/rerank")) {
+        rerankCalled = true;
+        return new Response(JSON.stringify({
+          results: [
+            { index: 2, relevance_score: 0.99 },
+            { index: 0, relevance_score: 0.72 },
+            { index: 1, relevance_score: 0.4 },
+          ],
+        }), { status: 200 });
+      }
+      return new Response(JSON.stringify({ data: [{ embedding: Array.from({ length: 8 }, () => 0.1) }], usage: { total_tokens: 1 } }), { status: 200 });
+    }) as typeof fetch;
+
+    const candidates: Candidate[] = [
+      { id: "first", score: 0.9, metadata: meta({ filePath: "/repo/src/first.ts", name: "rankHybridResults", chunkType: "function", startLine: 1, endLine: 3 }) },
+      { id: "second", score: 0.89, metadata: meta({ filePath: "/repo/src/second.ts", name: "otherThing", chunkType: "function", startLine: 1, endLine: 3 }) },
+      { id: "third", score: 0.88, metadata: meta({ filePath: "/repo/README.md", name: "docs", chunkType: "other", startLine: 1, endLine: 3 }) },
+    ];
+
+    const reranked = await (indexer as unknown as {
+      rerankCandidatesWithApi(
+        query: string,
+        items: Candidate[],
+        options?: { definitionIntent?: boolean; hasIdentifierHints?: boolean }
+      ): Promise<Candidate[]>;
+    }).rerankCandidatesWithApi("where is rankHybridResults implementation", candidates, {
+      hasIdentifierHints: true,
+    });
+
+    expect(rerankCalled).toBe(false);
+    expect(reranked.map((candidate) => candidate.id)).toEqual(["first", "second", "third"]);
+    globalThis.fetch = fetchSpy;
+  });
 });

From eaac1e14c390b4c4f1e75ca6e14f18c93a181b48 Mon Sep 17 00:00:00 2001
From: Helweg <kenneth@helweghansen.dk>
Date: Sat, 11 Apr 2026 21:28:38 +0200
Subject: [PATCH 06/20] feat: diversify exploratory rerank results

---
 src/indexer/index.ts            | 41 ++++++++++++++++++++++++++++++++-
 tests/retrieval-ranking.test.ts | 22 ++++++++++++++++++
 2 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/src/indexer/index.ts b/src/indexer/index.ts
index 9b096fd..7266c90 100644
--- a/src/indexer/index.ts
+++ b/src/indexer/index.ts
@@ -887,8 +887,47 @@ export function rerankResults(
     });
   }
 
+  const shouldDiversify = !(preferSourcePaths && identifierHints.length > 0);
+  const diversifiedHead = shouldDiversify ? diversifyRerankedHead(head) : head;
+
   const tail = candidates.slice(topN);
-  return [...head.map((entry) => entry.candidate), ...tail];
+  return [...diversifiedHead.map((entry) => entry.candidate), ...tail];
+}
+
+function diversifyRerankedHead<T extends {
+  candidate: RankedCandidate;
+  originalIndex: number;
+}>(head: T[]): T[] {
+  if (head.length <= 2) {
+    return head;
+  }
+
+  const seenFiles = new Set<string>();
+  const firstPass: T[] = [];
+  const remainder: T[] = [];
+
+  for (const entry of head) {
+    const filePath = entry.candidate.metadata.filePath;
+    if (!seenFiles.has(filePath)) {
+      seenFiles.add(filePath);
+      firstPass.push(entry);
+    } else {
+      remainder.push(entry);
+    }
+  }
+
+  if (remainder.length === 0) {
+    return head;
+  }
+
+  return [...firstPass, ...remainder].sort((a, b) => {
+    const aPrimary = firstPass.includes(a) ? 1 : 0;
+    const bPrimary = firstPass.includes(b) ? 1 : 0;
+    if (aPrimary !== bPrimary) {
+      return bPrimary - aPrimary;
+    }
+    return a.originalIndex - b.originalIndex;
+  });
 }
 
 export function rankHybridResults(
diff --git a/tests/retrieval-ranking.test.ts b/tests/retrieval-ranking.test.ts
index 5bebc52..14e9cb7 100644
--- a/tests/retrieval-ranking.test.ts
+++ b/tests/retrieval-ranking.test.ts
@@ -106,6 +106,28 @@ describe("retrieval ranking", () => {
     expect(rerankedAgain.map(r => r.id)).toEqual(["exactName", "pathOverlap", "generic"]);
   });
 
+  it("diversifies exploratory queries to avoid same-file duplicates dominating top results", () => {
+    const candidates: Candidate[] = [
+      { id: "fileA-1", score: 0.96, metadata: meta({ filePath: "/repo/src/auth.ts", name: "validateAuth", chunkType: "function" }) },
+      { id: "fileA-2", score: 0.95, metadata: meta({ filePath: "/repo/src/auth.ts", name: "refreshAuth", chunkType: "function" }) },
+      { id: "fileB-1", score: 0.94, metadata: meta({ filePath: "/repo/src/session.ts", name: "loadSession", chunkType: "function" }) },
+    ];
+
+    const reranked = rerankResults("auth flow", candidates, 10);
+    expect(reranked.map((candidate) => candidate.id).slice(0, 2)).toEqual(["fileA-1", "fileB-1"]);
+  });
+
+  it("does not diversify away exact-definition ranking for identifier queries", () => {
+    const candidates: Candidate[] = [
+      { id: "target", score: 0.96, metadata: meta({ filePath: "/repo/src/auth.ts", name: "rankHybridResults", chunkType: "function" }) },
+      { id: "same-file-secondary", score: 0.95, metadata: meta({ filePath: "/repo/src/auth.ts", name: "rankHybridResultsHelper", chunkType: "function" }) },
+      { id: "other-file", score: 0.94, metadata: meta({ filePath: "/repo/src/session.ts", name: "loadSession", chunkType: "function" }) },
+    ];
+
+    const reranked = rerankResults("where is rankHybridResults implementation", candidates, 10);
+    expect(reranked.map((candidate) => candidate.id).slice(0, 2)).toEqual(["target", "same-file-secondary"]);
+  });
+
   it("applies hybrid ranking path for search and semantic-only rerank for findSimilar", () => {
     const semantic: Candidate[] = [
       { id: "s1", score: 0.95, metadata: meta({ filePath: "/repo/src/auth.ts", name: "auth", chunkType: "function" }) },

From fd3213b06162c08196e5864384984a1eb80c7bee Mon Sep 17 00:00:00 2001
From: Helweg <kenneth@helweghansen.dk>
Date: Sat, 11 Apr 2026 21:35:53 +0200
Subject: [PATCH 07/20] feat: diversify external reranker output

---
 src/indexer/index.ts            | 28 +++++++++++++++--
 tests/retrieval-ranking.test.ts | 54 +++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+), 2 deletions(-)

diff --git a/src/indexer/index.ts b/src/indexer/index.ts
index 7266c90..1550bcd 100644
--- a/src/indexer/index.ts
+++ b/src/indexer/index.ts
@@ -930,6 +930,28 @@ function diversifyRerankedHead<T extends {
   });
 }
 
+function diversifyCandidatesByFile(candidates: RankedCandidate[], enabled: boolean): RankedCandidate[] {
+  if (!enabled || candidates.length <= 2) {
+    return candidates;
+  }
+
+  const seenFiles = new Set<string>();
+  const primary: RankedCandidate[] = [];
+  const remainder: RankedCandidate[] = [];
+
+  for (const candidate of candidates) {
+    const filePath = candidate.metadata.filePath;
+    if (!seenFiles.has(filePath)) {
+      seenFiles.add(filePath);
+      primary.push(candidate);
+    } else {
+      remainder.push(candidate);
+    }
+  }
+
+  return [...primary, ...remainder];
+}
+
 export function rankHybridResults(
   query: string,
   semanticResults: RankedCandidate[],
@@ -1594,7 +1616,7 @@ export class Indexer {
         }
 
         const order = new Map(rankedIds.map((id, index) => [id, index]));
-        rerankedHead.push(...[...bandCandidates].sort((a, b) => {
+        const bandReranked = [...bandCandidates].sort((a, b) => {
           const aRank = order.get(a.id) ?? Number.MAX_SAFE_INTEGER;
           const bRank = order.get(b.id) ?? Number.MAX_SAFE_INTEGER;
           if (aRank !== bRank) {
@@ -1604,7 +1626,9 @@ export class Indexer {
             return b.score - a.score;
           }
           return a.id.localeCompare(b.id);
-        }));
+        });
+        const shouldDiversifyBand = !options?.hasIdentifierHints;
+        rerankedHead.push(...diversifyCandidatesByFile(bandReranked, shouldDiversifyBand));
       }
 
       this.logger.search("debug", "Applied external reranker", {
diff --git a/tests/retrieval-ranking.test.ts b/tests/retrieval-ranking.test.ts
index 14e9cb7..7a50717 100644
--- a/tests/retrieval-ranking.test.ts
+++ b/tests/retrieval-ranking.test.ts
@@ -535,4 +535,58 @@ describe("retrieval ranking", () => {
     expect(reranked.map((candidate) => candidate.id)).toEqual(["first", "second", "third"]);
     globalThis.fetch = fetchSpy;
   });
+
+  it("diversifies external reranker output for exploratory queries", async () => {
+    const config = parseConfig({
+      embeddingProvider: "custom",
+      customProvider: {
+        baseUrl: "http://localhost:11434/v1",
+        model: "mock-embed",
+        dimensions: 8,
+      },
+      reranker: {
+        enabled: true,
+        provider: "custom",
+        model: "mock-reranker",
+        baseUrl: "https://rerank.example/v1",
+        topN: 3,
+      },
+    });
+    const indexer = new Indexer("/repo", config);
+
+    const fileA1 = createTempFile("src/auth.ts", "export function validateAuth() {\n  return true;\n}\n");
+    const fileA2 = fileA1;
+    const fileB = createTempFile("src/session.ts", "export function loadSession() {\n  return 'session';\n}\n");
+
+    const fetchSpy = globalThis.fetch;
+    globalThis.fetch = (async (input) => {
+      if (String(input).includes("/rerank")) {
+        return new Response(JSON.stringify({
+          results: [
+            { index: 0, relevance_score: 0.99 },
+            { index: 1, relevance_score: 0.98 },
+            { index: 2, relevance_score: 0.4 },
+          ],
+        }), { status: 200 });
+      }
+      return new Response(JSON.stringify({ data: [{ embedding: Array.from({ length: 8 }, () => 0.1) }], usage: { total_tokens: 1 } }), { status: 200 });
+    }) as typeof fetch;
+
+    const candidates: Candidate[] = [
+      { id: "fileA-1", score: 0.95, metadata: meta({ filePath: fileA1, name: "validateAuth", chunkType: "function", startLine: 1, endLine: 3 }) },
+      { id: "fileA-2", score: 0.94, metadata: meta({ filePath: fileA2, name: "refreshAuth", chunkType: "function", startLine: 1, endLine: 3 }) },
+      { id: "fileB", score: 0.93, metadata: meta({ filePath: fileB, name: "loadSession", chunkType: "function", startLine: 1, endLine: 3 }) },
+    ];
+
+    const reranked = await (indexer as unknown as {
+      rerankCandidatesWithApi(
+        query: string,
+        items: Candidate[],
+        options?: { definitionIntent?: boolean; hasIdentifierHints?: boolean }
+      ): Promise<Candidate[]>;
+    }).rerankCandidatesWithApi("auth flow", candidates);
+
+    expect(reranked.map((candidate) => candidate.id).slice(0, 2)).toEqual(["fileA-1", "fileB"]);
+    globalThis.fetch = fetchSpy;
+  });
 });

From d59b4699361a50f46a8c253fb8eb9c1baf5e9577 Mon Sep 17 00:00:00 2001
From: Helweg <kenneth@helweghansen.dk>
Date: Sat, 11 Apr 2026 21:39:07 +0200
Subject: [PATCH 08/20] feat: suppress duplicate rerank results by symbol

---
 src/indexer/index.ts            | 101 ++++++++++++++++++++------------
 tests/retrieval-ranking.test.ts |  63 ++++++++++++++++++++
 2 files changed, 126 insertions(+), 38 deletions(-)

diff --git a/src/indexer/index.ts b/src/indexer/index.ts
index 1550bcd..29ea48f 100644
--- a/src/indexer/index.ts
+++ b/src/indexer/index.ts
@@ -888,70 +888,95 @@ export function rerankResults(
   }
 
   const shouldDiversify = !(preferSourcePaths && identifierHints.length > 0);
-  const diversifiedHead = shouldDiversify ? diversifyRerankedHead(head) : head;
+  const diversifiedHead = diversifyEntriesByFileAndSymbol(head, (entry) => entry.candidate, shouldDiversify);
 
   const tail = candidates.slice(topN);
   return [...diversifiedHead.map((entry) => entry.candidate), ...tail];
 }
 
-function diversifyRerankedHead<T extends {
-  candidate: RankedCandidate;
-  originalIndex: number;
-}>(head: T[]): T[] {
-  if (head.length <= 2) {
-    return head;
+function diversifyEntriesByFileAndSymbol<T>(
+  entries: T[],
+  getCandidate: (entry: T) => RankedCandidate,
+  enabled: boolean
+): T[] {
+  if (!enabled || entries.length <= 2) {
+    return entries;
   }
 
-  const seenFiles = new Set<string>();
-  const firstPass: T[] = [];
-  const remainder: T[] = [];
+  const groups = new Map<string, T[]>();
+  const groupOrder: string[] = [];
 
-  for (const entry of head) {
-    const filePath = entry.candidate.metadata.filePath;
-    if (!seenFiles.has(filePath)) {
-      seenFiles.add(filePath);
-      firstPass.push(entry);
-    } else {
-      remainder.push(entry);
+  for (const entry of entries) {
+    const candidate = getCandidate(entry);
+    const filePath = candidate.metadata.filePath;
+    if (!groups.has(filePath)) {
+      groups.set(filePath, []);
+      groupOrder.push(filePath);
     }
+    groups.get(filePath)?.push(entry);
   }
 
-  if (remainder.length === 0) {
-    return head;
-  }
+  const diversifiedGroups = groupOrder.map((filePath) => {
+    const group = groups.get(filePath) ?? [];
+    return diversifyGroupBySymbol(group, getCandidate);
+  });
 
-  return [...firstPass, ...remainder].sort((a, b) => {
-    const aPrimary = firstPass.includes(a) ? 1 : 0;
-    const bPrimary = firstPass.includes(b) ? 1 : 0;
-    if (aPrimary !== bPrimary) {
-      return bPrimary - aPrimary;
+  const result: T[] = [];
+  let added = true;
+  let round = 0;
+  while (added) {
+    added = false;
+    for (const group of diversifiedGroups) {
+      const entry = group[round];
+      if (entry !== undefined) {
+        result.push(entry);
+        added = true;
+      }
     }
-    return a.originalIndex - b.originalIndex;
-  });
+    round += 1;
+  }
+
+  return result;
 }
 
 function diversifyCandidatesByFile(candidates: RankedCandidate[], enabled: boolean): RankedCandidate[] {
-  if (!enabled || candidates.length <= 2) {
-    return candidates;
+  return diversifyEntriesByFileAndSymbol(candidates, (candidate) => candidate, enabled);
+}
+
+function diversifyGroupBySymbol<T>(
+  entries: T[],
+  getCandidate: (entry: T) => RankedCandidate
+): T[] {
+  if (entries.length <= 2) {
+    return entries;
   }
 
-  const seenFiles = new Set<string>();
-  const primary: RankedCandidate[] = [];
-  const remainder: RankedCandidate[] = [];
+  const seenKeys = new Set<string>();
+  const primary: T[] = [];
+  const remainder: T[] = [];
 
-  for (const candidate of candidates) {
-    const filePath = candidate.metadata.filePath;
-    if (!seenFiles.has(filePath)) {
-      seenFiles.add(filePath);
-      primary.push(candidate);
+  for (const entry of entries) {
+    const key = buildDiversityKey(getCandidate(entry).metadata);
+    if (!seenKeys.has(key)) {
+      seenKeys.add(key);
+      primary.push(entry);
     } else {
-      remainder.push(candidate);
+      remainder.push(entry);
     }
   }
 
   return [...primary, ...remainder];
 }
 
+function buildDiversityKey(metadata: ChunkMetadata): string {
+  const normalizedPath = metadata.filePath.toLowerCase();
+  const normalizedName = (metadata.name ?? "").trim().toLowerCase();
+  if (normalizedName.length > 0) {
+    return `${normalizedPath}#${normalizedName}`;
+  }
+  return normalizedPath;
+}
+
 export function rankHybridResults(
   query: string,
   semanticResults: RankedCandidate[],
diff --git a/tests/retrieval-ranking.test.ts b/tests/retrieval-ranking.test.ts
index 7a50717..82ab623 100644
--- a/tests/retrieval-ranking.test.ts
+++ b/tests/retrieval-ranking.test.ts
@@ -117,6 +117,17 @@ describe("retrieval ranking", () => {
     expect(reranked.map((candidate) => candidate.id).slice(0, 2)).toEqual(["fileA-1", "fileB-1"]);
   });
 
+  it("treats same-symbol duplicates as lower priority before distinct symbols", () => {
+    const candidates: Candidate[] = [
+      { id: "same-symbol-1", score: 0.96, metadata: meta({ filePath: "/repo/src/auth.ts", name: "validateAuth", chunkType: "function" }) },
+      { id: "same-symbol-2", score: 0.95, metadata: meta({ filePath: "/repo/src/auth.ts", name: "validateAuth", chunkType: "function" }) },
+      { id: "different-symbol", score: 0.94, metadata: meta({ filePath: "/repo/src/auth.ts", name: "refreshAuth", chunkType: "function" }) },
+    ];
+
+    const reranked = rerankResults("auth flow", candidates, 10);
+    expect(reranked.map((candidate) => candidate.id).slice(0, 2)).toEqual(["same-symbol-1", "different-symbol"]);
+  });
+
   it("does not diversify away exact-definition ranking for identifier queries", () => {
     const candidates: Candidate[] = [
       { id: "target", score: 0.96, metadata: meta({ filePath: "/repo/src/auth.ts", name: "rankHybridResults", chunkType: "function" }) },
@@ -589,4 +600,56 @@ describe("retrieval ranking", () => {
     expect(reranked.map((candidate) => candidate.id).slice(0, 2)).toEqual(["fileA-1", "fileB"]);
     globalThis.fetch = fetchSpy;
   });
+
+  it("diversifies external reranker duplicates by symbol before repeating the same symbol", async () => {
+    const config = parseConfig({
+      embeddingProvider: "custom",
+      customProvider: {
+        baseUrl: "http://localhost:11434/v1",
+        model: "mock-embed",
+        dimensions: 8,
+      },
+      reranker: {
+        enabled: true,
+        provider: "custom",
+        model: "mock-reranker",
+        baseUrl: "https://rerank.example/v1",
+        topN: 3,
+      },
+    });
+    const indexer = new Indexer("/repo", config);
+
+    const authFile = createTempFile("src/auth.ts", "export function validateAuth() {\n  return true;\n}\nexport function refreshAuth() {\n  return false;\n}\n");
+
+    const fetchSpy = globalThis.fetch;
+    globalThis.fetch = (async (input) => {
+      if (String(input).includes("/rerank")) {
+        return new Response(JSON.stringify({
+          results: [
+            { index: 0, relevance_score: 0.99 },
+            { index: 1, relevance_score: 0.98 },
+            { index: 2, relevance_score: 0.4 },
+          ],
+        }), { status: 200 });
+      }
+      return new Response(JSON.stringify({ data: [{ embedding: Array.from({ length: 8 }, () => 0.1) }], usage: { total_tokens: 1 } }), { status: 200 });
+    }) as typeof fetch;
+
+    const candidates: Candidate[] = [
+      { id: "same-symbol-1", score: 0.95, metadata: meta({ filePath: authFile, name: "validateAuth", chunkType: "function", startLine: 1, endLine: 3 }) },
+      { id: "same-symbol-2", score: 0.94, metadata: meta({ filePath: authFile, name: "validateAuth", chunkType: "function", startLine: 1, endLine: 3 }) },
+      { id: "different-symbol", score: 0.93, metadata: meta({ filePath: authFile, name: "refreshAuth", chunkType: "function", startLine: 4, endLine: 6 }) },
+    ];
+
+    const reranked = await (indexer as unknown as {
+      rerankCandidatesWithApi(
+        query: string,
+        items: Candidate[],
+        options?: { definitionIntent?: boolean; hasIdentifierHints?: boolean }
+      ): Promise<Candidate[]>;
+    }).rerankCandidatesWithApi("auth flow", candidates);
+
+    expect(reranked.map((candidate) => candidate.id).slice(0, 2)).toEqual(["same-symbol-1", "different-symbol"]);
+    globalThis.fetch = fetchSpy;
+  });
 });

From c6c6bd5c60e3e6f0a375fb9051ef11a33deb2652 Mon Sep 17 00:00:00 2001
From: Helweg <kenneth@helweghansen.dk>
Date: Sat, 11 Apr 2026 21:43:19 +0200
Subject: [PATCH 09/20] test: add reranker diversity benchmark coverage

---
 benchmarks/baselines/retrieval-baseline.json |  1 +
 tests/retrieval-benchmark.test.ts            | 42 +++++++++++++++++++-
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/benchmarks/baselines/retrieval-baseline.json b/benchmarks/baselines/retrieval-baseline.json
index 726d8df..2e83428 100644
--- a/benchmarks/baselines/retrieval-baseline.json
+++ b/benchmarks/baselines/retrieval-baseline.json
@@ -2,6 +2,7 @@
   "generatedAt": "2026-03-13T14:21:43.213Z",
   "queryCount": 3,
   "hitAt5": 1,
+  "distinctTop3Ratio": 0.9166666666666666,
   "medianMs": 0.010916000000008808,
   "p95Ms": 0.024666000000024724
 }
diff --git a/tests/retrieval-benchmark.test.ts b/tests/retrieval-benchmark.test.ts
index 4a9ef6a..45fc2c4 100644
--- a/tests/retrieval-benchmark.test.ts
+++ b/tests/retrieval-benchmark.test.ts
@@ -23,6 +23,7 @@ interface BenchmarkArtifact {
   generatedAt: string;
   queryCount: number;
   hitAt5: number;
+  distinctTop3Ratio: number;
   medianMs: number;
   p95Ms: number;
 }
@@ -80,6 +81,26 @@ function computeHitAt5(queries: BenchmarkQuery[]): number {
   return queries.length === 0 ? 0 : hits / queries.length;
 }
 
+function computeDistinctTop3Ratio(queries: BenchmarkQuery[]): number {
+  if (queries.length === 0) return 0;
+
+  let totalRatio = 0;
+  for (const q of queries) {
+    const ranked = rankHybridResults(q.query, q.semantic, q.keyword, {
+      fusionStrategy: "rrf",
+      rrfK: 60,
+      rerankTopN: 20,
+      limit: 10,
+      hybridWeight: 0.5,
+    });
+    const top3 = ranked.slice(0, 3);
+    const distinctFiles = new Set(top3.map((r) => r.metadata.filePath)).size;
+    totalRatio += distinctFiles / Math.max(1, top3.length);
+  }
+
+  return totalRatio / queries.length;
+}
+
 function runLatency(queries: BenchmarkQuery[]): { medianMs: number; p95Ms: number } {
   const allSamples: LatencySample[] = [];
   const batchP95: number[] = [];
@@ -187,16 +208,18 @@ function loadBaseline(): BenchmarkArtifact {
   const parsed = JSON.parse(raw) as Partial<BenchmarkArtifact>;
   if (
     typeof parsed.hitAt5 !== "number" ||
+    typeof parsed.distinctTop3Ratio !== "number" ||
     typeof parsed.medianMs !== "number" ||
     typeof parsed.p95Ms !== "number"
   ) {
-    throw new Error("retrieval-baseline.json is invalid: expected numeric hitAt5, medianMs, and p95Ms");
+    throw new Error("retrieval-baseline.json is invalid: expected numeric hitAt5, distinctTop3Ratio, medianMs, and p95Ms");
   }
 
   return {
     generatedAt: typeof parsed.generatedAt === "string" ? parsed.generatedAt : new Date(0).toISOString(),
     queryCount: typeof parsed.queryCount === "number" ? parsed.queryCount : 0,
     hitAt5: parsed.hitAt5,
+    distinctTop3Ratio: parsed.distinctTop3Ratio,
     medianMs: parsed.medianMs,
     p95Ms: parsed.p95Ms,
   };
@@ -242,15 +265,31 @@ describe("retrieval benchmark", () => {
           { id: "k-doc", score: 10, metadata: meta("/repo/README.md", "find similar", "other") },
         ],
       },
+      {
+        query: "auth flow exploration",
+        expectedTop5: ["/repo/src/auth.ts", "/repo/src/session.ts"],
+        semantic: [
+          { id: "s-auth-1", score: 0.96, metadata: meta("/repo/src/auth.ts", "validateAuth") },
+          { id: "s-auth-2", score: 0.95, metadata: meta("/repo/src/auth.ts", "refreshAuth") },
+          { id: "s-session", score: 0.94, metadata: meta("/repo/src/session.ts", "loadSession") },
+        ],
+        keyword: [
+          { id: "s-auth-1", score: 25, metadata: meta("/repo/src/auth.ts", "validateAuth") },
+          { id: "s-auth-2", score: 24, metadata: meta("/repo/src/auth.ts", "refreshAuth") },
+          { id: "s-session", score: 10, metadata: meta("/repo/src/session.ts", "loadSession") },
+        ],
+      },
     ];
 
     const hitAt5 = computeHitAt5(queries);
+    const distinctTop3Ratio = computeDistinctTop3Ratio(queries);
     const latency = runLatency(queries);
 
     const candidate: BenchmarkArtifact = {
       generatedAt: new Date().toISOString(),
       queryCount: queries.length,
       hitAt5,
+      distinctTop3Ratio,
       medianMs: latency.medianMs,
       p95Ms: latency.p95Ms,
     };
@@ -261,6 +300,7 @@ describe("retrieval benchmark", () => {
     const baseline = loadBaseline();
 
     expect(candidate.hitAt5).toBeGreaterThanOrEqual(baseline.hitAt5);
+    expect(candidate.distinctTop3Ratio).toBeGreaterThanOrEqual(baseline.distinctTop3Ratio);
     const medianBudget = Math.max(
       baseline.medianMs * 1.15 + LATENCY_BUDGET_ABSOLUTE_JITTER_MS,
       LATENCY_BUDGET_MEDIAN_MIN_MS

From b9409afba0f75bf77e32eee6628d24c22d1dd2f7 Mon Sep 17 00:00:00 2001
From: Helweg <kenneth@helweghansen.dk>
Date: Sat, 11 Apr 2026 21:49:08 +0200
Subject: [PATCH 10/20] feat: track eval distinct top-k diversity

---
 src/eval/compare.ts |  1 +
 src/eval/metrics.ts | 10 ++++++++++
 src/eval/reports.ts |  4 ++++
 src/eval/types.ts   |  2 ++
 4 files changed, 17 insertions(+)

diff --git a/src/eval/compare.ts b/src/eval/compare.ts
index 989fc0f..6c5f97d 100644
--- a/src/eval/compare.ts
+++ b/src/eval/compare.ts
@@ -21,6 +21,7 @@ export function compareSummaries(current: EvalSummary, baseline: EvalSummary, ag
       hitAt10: metricDelta(current.metrics.hitAt10, baseline.metrics.hitAt10),
       mrrAt10: metricDelta(current.metrics.mrrAt10, baseline.metrics.mrrAt10),
       ndcgAt10: metricDelta(current.metrics.ndcgAt10, baseline.metrics.ndcgAt10),
+      distinctTop3Ratio: metricDelta(current.metrics.distinctTop3Ratio, baseline.metrics.distinctTop3Ratio),
       latencyP50Ms: metricDelta(current.metrics.latencyMs.p50, baseline.metrics.latencyMs.p50),
       latencyP95Ms: metricDelta(current.metrics.latencyMs.p95, baseline.metrics.latencyMs.p95),
       latencyP99Ms: metricDelta(current.metrics.latencyMs.p99, baseline.metrics.latencyMs.p99),
diff --git a/src/eval/metrics.ts b/src/eval/metrics.ts
index c4ea8bf..c6c52a3 100644
--- a/src/eval/metrics.ts
+++ b/src/eval/metrics.ts
@@ -39,6 +39,13 @@ function uniqueResultsByPath(results: PerQueryEvalResult["results"]): PerQueryEv
   return unique;
 }
 
+function distinctTopKRatio(results: PerQueryEvalResult["results"], k: number): number {
+  const top = results.slice(0, k);
+  if (top.length === 0) return 0;
+  const distinct = new Set(top.map((result) => normalizePath(result.filePath))).size;
+  return distinct / top.length;
+}
+
 export function pathMatchesExpected(actualPath: string, expectedPath: string): boolean {
   const actual = normalizePath(actualPath);
   const expected = normalizePath(expectedPath);
@@ -172,6 +179,7 @@ export function computeEvalMetrics(
     hitAt10: 0,
     mrrAt10: 0,
     ndcgAt10: 0,
+    distinctTop3Ratio: 0,
   };
 
   const failureBuckets: Record<FailureBucket, number> = {
@@ -190,6 +198,7 @@ export function computeEvalMetrics(
     if (query.hitAt10) sum.hitAt10 += 1;
     sum.mrrAt10 += query.reciprocalRankAt10;
     sum.ndcgAt10 += query.ndcgAt10;
+    sum.distinctTop3Ratio += distinctTopKRatio(query.results, 3);
     if (query.failureBucket) {
       failureBuckets[query.failureBucket] += 1;
     }
@@ -204,6 +213,7 @@ export function computeEvalMetrics(
     hitAt10: safeDiv(sum.hitAt10),
     mrrAt10: safeDiv(sum.mrrAt10),
     ndcgAt10: safeDiv(sum.ndcgAt10),
+    distinctTop3Ratio: safeDiv(sum.distinctTop3Ratio),
     latencyMs: {
       p50: percentile(latencies, 0.5),
       p95: percentile(latencies, 0.95),
diff --git a/src/eval/reports.ts b/src/eval/reports.ts
index a0b1a9f..3bc6807 100644
--- a/src/eval/reports.ts
+++ b/src/eval/reports.ts
@@ -74,6 +74,7 @@ export function createSummaryMarkdown(
   lines.push(`| Hit@10 | ${formatPct(summary.metrics.hitAt10)} |`);
   lines.push(`| MRR@10 | ${summary.metrics.mrrAt10.toFixed(4)} |`);
   lines.push(`| nDCG@10 | ${summary.metrics.ndcgAt10.toFixed(4)} |`);
+  lines.push(`| Distinct Top@3 | ${formatPct(summary.metrics.distinctTop3Ratio)} |`);
   lines.push(`| Latency p50 | ${formatMs(summary.metrics.latencyMs.p50)} |`);
   lines.push(`| Latency p95 | ${formatMs(summary.metrics.latencyMs.p95)} |`);
   lines.push(`| Latency p99 | ${formatMs(summary.metrics.latencyMs.p99)} |`);
@@ -116,6 +117,9 @@ export function createSummaryMarkdown(
     lines.push(
       `| nDCG@10 | ${comparison.deltas.ndcgAt10.baseline.toFixed(4)} | ${comparison.deltas.ndcgAt10.current.toFixed(4)} | ${signed(comparison.deltas.ndcgAt10.absolute)} |`
     );
+    lines.push(
+      `| Distinct Top@3 | ${formatPct(comparison.deltas.distinctTop3Ratio.baseline)} | ${formatPct(comparison.deltas.distinctTop3Ratio.current)} | ${signed(comparison.deltas.distinctTop3Ratio.absolute)} |`
+    );
     lines.push(
       `| p95 latency (ms) | ${comparison.deltas.latencyP95Ms.baseline.toFixed(3)} | ${comparison.deltas.latencyP95Ms.current.toFixed(3)} | ${signed(comparison.deltas.latencyP95Ms.absolute, 3)} |`
     );
diff --git a/src/eval/types.ts b/src/eval/types.ts
index 30b51ef..7b28cf0 100644
--- a/src/eval/types.ts
+++ b/src/eval/types.ts
@@ -78,6 +78,7 @@ export interface EvalMetrics {
   hitAt10: number;
   mrrAt10: number;
   ndcgAt10: number;
+  distinctTop3Ratio: number;
   latencyMs: {
     p50: number;
     p95: number;
@@ -123,6 +124,7 @@ export interface EvalComparison {
     hitAt10: MetricDelta;
     mrrAt10: MetricDelta;
     ndcgAt10: MetricDelta;
+    distinctTop3Ratio: MetricDelta;
     latencyP50Ms: MetricDelta;
     latencyP95Ms: MetricDelta;
     latencyP99Ms: MetricDelta;

From dc49b1a85c1b91c5b81a907f26eb4df62ad157ac Mon Sep 17 00:00:00 2001
From: Helweg <kenneth@helweghansen.dk>
Date: Sat, 11 Apr 2026 21:49:19 +0200
Subject: [PATCH 11/20] test: cover eval distinct top-k diversity

---
 tests/eval-metrics.test.ts | 41 ++++++++++++++++++++++++++++++++++++++
 tests/eval-runner.test.ts  |  3 +++
 2 files changed, 44 insertions(+)

diff --git a/tests/eval-metrics.test.ts b/tests/eval-metrics.test.ts
index 948d954..32b7413 100644
--- a/tests/eval-metrics.test.ts
+++ b/tests/eval-metrics.test.ts
@@ -160,11 +160,52 @@ describe("eval metrics", () => {
     expect(metrics.hitAt1).toBe(0.5);
     expect(metrics.hitAt3).toBe(1);
     expect(metrics.mrrAt10).toBeCloseTo(0.75, 5);
+    expect(metrics.distinctTop3Ratio).toBe(1);
     expect(metrics.latencyMs.p50).toBeGreaterThan(0);
     expect(metrics.embedding.callCount).toBe(20);
     expect(metrics.embedding.estimatedCostUsd).toBeCloseTo(0.00002, 8);
   });
 
+  it("tracks distinctTop3Ratio on per-query eval output", () => {
+    const queries: GoldenQuery[] = [query({ id: "q-dup" })];
+    const perQuery = [
+      buildPerQueryResult(
+        queries[0],
+        [
+          {
+            filePath: "/repo/src/indexer/index.ts",
+            startLine: 1,
+            endLine: 2,
+            score: 1,
+            chunkType: "function",
+            name: "rankHybridResults",
+          },
+          {
+            filePath: "/repo/src/indexer/index.ts",
+            startLine: 10,
+            endLine: 20,
+            score: 0.95,
+            chunkType: "function",
+            name: "rerankResults",
+          },
+          {
+            filePath: "/repo/src/tools/index.ts",
+            startLine: 1,
+            endLine: 2,
+            score: 0.9,
+            chunkType: "function",
+            name: "codebase_search",
+          },
+        ],
+        10,
+        10
+      ),
+    ];
+
+    const metrics = computeEvalMetrics(queries, perQuery, 0, 0, 0);
+    expect(metrics.distinctTop3Ratio).toBe(1);
+  });
+
   it("uses deterministic percentile behavior for tiny samples", () => {
     const q = query();
     const build = (id: string, latencyMs: number) =>
diff --git a/tests/eval-runner.test.ts b/tests/eval-runner.test.ts
index 60a7d4e..e1739ef 100644
--- a/tests/eval-runner.test.ts
+++ b/tests/eval-runner.test.ts
@@ -121,7 +121,9 @@ describe("eval runner", () => {
     });
 
     expect(result.summary.queryCount).toBe(1);
+    expect(typeof result.summary.metrics.distinctTop3Ratio).toBe("number");
     expect(readFileSync(path.join(result.outputDir, "summary.json"), "utf-8")).toContain("\"metrics\"");
+    expect(readFileSync(path.join(result.outputDir, "summary.md"), "utf-8")).toContain("Distinct Top@3");
     expect(readFileSync(path.join(result.outputDir, "summary.md"), "utf-8")).toContain("# Evaluation Summary");
     expect(readFileSync(path.join(result.outputDir, "per-query.json"), "utf-8")).toContain("\"queries\"");
   });
@@ -152,6 +154,7 @@ describe("eval runner", () => {
     });
 
     expect(compareRun.comparison).toBeDefined();
+    expect(readFileSync(path.join(compareRun.outputDir, "compare.json"), "utf-8")).toContain("\"distinctTop3Ratio\"");
     expect(readFileSync(path.join(compareRun.outputDir, "compare.json"), "utf-8")).toContain("\"deltas\"");
   });
 

From ad0b70f822a5fcbbbdd68c086b8f39263c068972 Mon Sep 17 00:00:00 2001
From: Helweg <kenneth@helweghansen.dk>
Date: Sat, 11 Apr 2026 21:54:14 +0200
Subject: [PATCH 12/20] feat: track raw eval top-k diversity

---
 src/eval/compare.ts | 1 +
 src/eval/metrics.ts | 4 ++++
 src/eval/reports.ts | 4 ++++
 src/eval/types.ts   | 3 +++
 4 files changed, 12 insertions(+)

diff --git a/src/eval/compare.ts b/src/eval/compare.ts
index 6c5f97d..c648f3e 100644
--- a/src/eval/compare.ts
+++ b/src/eval/compare.ts
@@ -22,6 +22,7 @@ export function compareSummaries(current: EvalSummary, baseline: EvalSummary, ag
       mrrAt10: metricDelta(current.metrics.mrrAt10, baseline.metrics.mrrAt10),
       ndcgAt10: metricDelta(current.metrics.ndcgAt10, baseline.metrics.ndcgAt10),
       distinctTop3Ratio: metricDelta(current.metrics.distinctTop3Ratio, baseline.metrics.distinctTop3Ratio),
+      rawDistinctTop3Ratio: metricDelta(current.metrics.rawDistinctTop3Ratio, baseline.metrics.rawDistinctTop3Ratio),
       latencyP50Ms: metricDelta(current.metrics.latencyMs.p50, baseline.metrics.latencyMs.p50),
       latencyP95Ms: metricDelta(current.metrics.latencyMs.p95, baseline.metrics.latencyMs.p95),
       latencyP99Ms: metricDelta(current.metrics.latencyMs.p99, baseline.metrics.latencyMs.p99),
diff --git a/src/eval/metrics.ts b/src/eval/metrics.ts
index c6c52a3..3693d64 100644
--- a/src/eval/metrics.ts
+++ b/src/eval/metrics.ts
@@ -156,6 +156,7 @@ export function buildPerQueryResult(
     reciprocalRankAt10: reciprocalRankAtK(deduped, relevantPaths, 10),
     ndcgAt10: ndcgAtK(deduped, relevantPaths, 10),
     failureBucket: classifyFailureBucket(query, results, k),
+    rawTop3DistinctRatio: distinctTopKRatio(results, 3),
     results: deduped,
   };
 
@@ -180,6 +181,7 @@ export function computeEvalMetrics(
     mrrAt10: 0,
     ndcgAt10: 0,
     distinctTop3Ratio: 0,
+    rawDistinctTop3Ratio: 0,
   };
 
   const failureBuckets: Record<FailureBucket, number> = {
@@ -199,6 +201,7 @@ export function computeEvalMetrics(
     sum.mrrAt10 += query.reciprocalRankAt10;
     sum.ndcgAt10 += query.ndcgAt10;
     sum.distinctTop3Ratio += distinctTopKRatio(query.results, 3);
+    sum.rawDistinctTop3Ratio += query.rawTop3DistinctRatio;
     if (query.failureBucket) {
       failureBuckets[query.failureBucket] += 1;
     }
@@ -214,6 +217,7 @@ export function computeEvalMetrics(
     mrrAt10: safeDiv(sum.mrrAt10),
     ndcgAt10: safeDiv(sum.ndcgAt10),
     distinctTop3Ratio: safeDiv(sum.distinctTop3Ratio),
+    rawDistinctTop3Ratio: safeDiv(sum.rawDistinctTop3Ratio),
     latencyMs: {
       p50: percentile(latencies, 0.5),
       p95: percentile(latencies, 0.95),
diff --git a/src/eval/reports.ts b/src/eval/reports.ts
index 3bc6807..f55c21c 100644
--- a/src/eval/reports.ts
+++ b/src/eval/reports.ts
@@ -75,6 +75,7 @@ export function createSummaryMarkdown(
   lines.push(`| MRR@10 | ${summary.metrics.mrrAt10.toFixed(4)} |`);
   lines.push(`| nDCG@10 | ${summary.metrics.ndcgAt10.toFixed(4)} |`);
   lines.push(`| Distinct Top@3 | ${formatPct(summary.metrics.distinctTop3Ratio)} |`);
+  lines.push(`| Raw Distinct Top@3 | ${formatPct(summary.metrics.rawDistinctTop3Ratio)} |`);
   lines.push(`| Latency p50 | ${formatMs(summary.metrics.latencyMs.p50)} |`);
   lines.push(`| Latency p95 | ${formatMs(summary.metrics.latencyMs.p95)} |`);
   lines.push(`| Latency p99 | ${formatMs(summary.metrics.latencyMs.p99)} |`);
@@ -120,6 +121,9 @@ export function createSummaryMarkdown(
     lines.push(
       `| Distinct Top@3 | ${formatPct(comparison.deltas.distinctTop3Ratio.baseline)} | ${formatPct(comparison.deltas.distinctTop3Ratio.current)} | ${signed(comparison.deltas.distinctTop3Ratio.absolute)} |`
     );
+    lines.push(
+      `| Raw Distinct Top@3 | ${formatPct(comparison.deltas.rawDistinctTop3Ratio.baseline)} | ${formatPct(comparison.deltas.rawDistinctTop3Ratio.current)} | ${signed(comparison.deltas.rawDistinctTop3Ratio.absolute)} |`
+    );
     lines.push(
       `| p95 latency (ms) | ${comparison.deltas.latencyP95Ms.baseline.toFixed(3)} | ${comparison.deltas.latencyP95Ms.current.toFixed(3)} | ${signed(comparison.deltas.latencyP95Ms.absolute, 3)} |`
     );
diff --git a/src/eval/types.ts b/src/eval/types.ts
index 7b28cf0..72ae3f6 100644
--- a/src/eval/types.ts
+++ b/src/eval/types.ts
@@ -68,6 +68,7 @@ export interface PerQueryEvalResult {
   reciprocalRankAt10: number;
   ndcgAt10: number;
   failureBucket?: FailureBucket;
+  rawTop3DistinctRatio: number;
   results: EvalSearchResult[];
 }
 
@@ -79,6 +80,7 @@ export interface EvalMetrics {
   mrrAt10: number;
   ndcgAt10: number;
   distinctTop3Ratio: number;
+  rawDistinctTop3Ratio: number;
   latencyMs: {
     p50: number;
     p95: number;
@@ -125,6 +127,7 @@ export interface EvalComparison {
     mrrAt10: MetricDelta;
     ndcgAt10: MetricDelta;
     distinctTop3Ratio: MetricDelta;
+    rawDistinctTop3Ratio: MetricDelta;
     latencyP50Ms: MetricDelta;
     latencyP95Ms: MetricDelta;
     latencyP99Ms: MetricDelta;

From dba17fb8dc3015012a29033b2127c858937990df Mon Sep 17 00:00:00 2001
From: Helweg <kenneth@helweghansen.dk>
Date: Sat, 11 Apr 2026 21:54:26 +0200
Subject: [PATCH 13/20] test: cover raw eval top-k diversity

---
 tests/eval-metrics.test.ts | 5 ++++-
 tests/eval-runner.test.ts  | 3 +++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/tests/eval-metrics.test.ts b/tests/eval-metrics.test.ts
index 32b7413..f500c96 100644
--- a/tests/eval-metrics.test.ts
+++ b/tests/eval-metrics.test.ts
@@ -161,12 +161,13 @@ describe("eval metrics", () => {
     expect(metrics.hitAt3).toBe(1);
     expect(metrics.mrrAt10).toBeCloseTo(0.75, 5);
     expect(metrics.distinctTop3Ratio).toBe(1);
+    expect(metrics.rawDistinctTop3Ratio).toBe(1);
     expect(metrics.latencyMs.p50).toBeGreaterThan(0);
     expect(metrics.embedding.callCount).toBe(20);
     expect(metrics.embedding.estimatedCostUsd).toBeCloseTo(0.00002, 8);
   });
 
-  it("tracks distinctTop3Ratio on per-query eval output", () => {
+  it("tracks deduped and raw distinctTop3 ratios separately", () => {
     const queries: GoldenQuery[] = [query({ id: "q-dup" })];
     const perQuery = [
       buildPerQueryResult(
@@ -204,6 +205,8 @@ describe("eval metrics", () => {
 
     const metrics = computeEvalMetrics(queries, perQuery, 0, 0, 0);
     expect(metrics.distinctTop3Ratio).toBe(1);
+    expect(metrics.rawDistinctTop3Ratio).toBeCloseTo(2 / 3, 6);
+    expect(perQuery[0].rawTop3DistinctRatio).toBeCloseTo(2 / 3, 6);
   });
 
   it("uses deterministic percentile behavior for tiny samples", () => {
diff --git a/tests/eval-runner.test.ts b/tests/eval-runner.test.ts
index e1739ef..558201b 100644
--- a/tests/eval-runner.test.ts
+++ b/tests/eval-runner.test.ts
@@ -122,8 +122,10 @@ describe("eval runner", () => {
 
     expect(result.summary.queryCount).toBe(1);
     expect(typeof result.summary.metrics.distinctTop3Ratio).toBe("number");
+    expect(typeof result.summary.metrics.rawDistinctTop3Ratio).toBe("number");
     expect(readFileSync(path.join(result.outputDir, "summary.json"), "utf-8")).toContain("\"metrics\"");
     expect(readFileSync(path.join(result.outputDir, "summary.md"), "utf-8")).toContain("Distinct Top@3");
+    expect(readFileSync(path.join(result.outputDir, "summary.md"), "utf-8")).toContain("Raw Distinct Top@3");
     expect(readFileSync(path.join(result.outputDir, "summary.md"), "utf-8")).toContain("# Evaluation Summary");
     expect(readFileSync(path.join(result.outputDir, "per-query.json"), "utf-8")).toContain("\"queries\"");
   });
@@ -155,6 +157,7 @@ describe("eval runner", () => {
 
     expect(compareRun.comparison).toBeDefined();
     expect(readFileSync(path.join(compareRun.outputDir, "compare.json"), "utf-8")).toContain("\"distinctTop3Ratio\"");
+    expect(readFileSync(path.join(compareRun.outputDir, "compare.json"), "utf-8")).toContain("\"rawDistinctTop3Ratio\"");
     expect(readFileSync(path.join(compareRun.outputDir, "compare.json"), "utf-8")).toContain("\"deltas\"");
   });
 

From 481b29b20c8fdcfe592f092e35fb87139208cd2e Mon Sep 17 00:00:00 2001
From: Helweg <kenneth@helweghansen.dk>
Date: Sat, 11 Apr 2026 21:59:31 +0200
Subject: [PATCH 14/20] feat: gate eval raw top-k diversity

---
 src/eval/budget.ts | 20 ++++++++++++++++++++
 src/eval/schema.ts | 14 ++++++++++++++
 src/eval/types.ts  |  2 ++
 3 files changed, 36 insertions(+)

diff --git a/src/eval/budget.ts b/src/eval/budget.ts
index 6a61b60..d558cd9 100644
--- a/src/eval/budget.ts
+++ b/src/eval/budget.ts
@@ -24,6 +24,16 @@ export function evaluateBudgetGate(
     });
   }
 
+  if (
+    thresholds.minRawDistinctTop3Ratio !== undefined &&
+    summary.metrics.rawDistinctTop3Ratio < thresholds.minRawDistinctTop3Ratio
+  ) {
+    violations.push({
+      metric: "minRawDistinctTop3Ratio",
+      message: `Raw Distinct Top@3 ${summary.metrics.rawDistinctTop3Ratio.toFixed(4)} is below minimum ${thresholds.minRawDistinctTop3Ratio.toFixed(4)}`,
+    });
+  }
+
   if (comparison) {
     if (
       thresholds.hitAt5MaxDrop !== undefined &&
@@ -45,6 +55,16 @@ export function evaluateBudgetGate(
       });
     }
 
+    if (
+      thresholds.rawDistinctTop3RatioMaxDrop !== undefined &&
+      comparison.deltas.rawDistinctTop3Ratio.absolute < -thresholds.rawDistinctTop3RatioMaxDrop
+    ) {
+      violations.push({
+        metric: "rawDistinctTop3RatioMaxDrop",
+        message: `Raw Distinct Top@3 drop ${comparison.deltas.rawDistinctTop3Ratio.absolute.toFixed(4)} exceeds allowed -${thresholds.rawDistinctTop3RatioMaxDrop.toFixed(4)}`,
+      });
+    }
+
     if (thresholds.p95LatencyMaxMultiplier !== undefined) {
       const baselineP95 = comparison.deltas.latencyP95Ms.baseline;
       if (baselineP95 > BASELINE_P95_EPSILON_MS) {
diff --git a/src/eval/schema.ts b/src/eval/schema.ts
index 4293989..9bc0d67 100644
--- a/src/eval/schema.ts
+++ b/src/eval/schema.ts
@@ -195,6 +195,13 @@ export function parseBudget(raw: unknown, sourceLabel: string): EvalBudget {
         thresholds.mrrAt10MaxDrop === undefined
           ? undefined
           : asPositiveNumber(thresholds.mrrAt10MaxDrop, `${sourceLabel}.thresholds.mrrAt10MaxDrop`),
+      rawDistinctTop3RatioMaxDrop:
+        thresholds.rawDistinctTop3RatioMaxDrop === undefined
+          ? undefined
+          : asPositiveNumber(
+              thresholds.rawDistinctTop3RatioMaxDrop,
+              `${sourceLabel}.thresholds.rawDistinctTop3RatioMaxDrop`
+            ),
       p95LatencyMaxMultiplier:
         thresholds.p95LatencyMaxMultiplier === undefined
           ? undefined
@@ -217,6 +224,13 @@ export function parseBudget(raw: unknown, sourceLabel: string): EvalBudget {
         thresholds.minMrrAt10 === undefined
           ? undefined
           : asPositiveNumber(thresholds.minMrrAt10, `${sourceLabel}.thresholds.minMrrAt10`),
+      minRawDistinctTop3Ratio:
+        thresholds.minRawDistinctTop3Ratio === undefined
+          ? undefined
+          : asPositiveNumber(
+              thresholds.minRawDistinctTop3Ratio,
+              `${sourceLabel}.thresholds.minRawDistinctTop3Ratio`
+            ),
     },
   };
 }
diff --git a/src/eval/types.ts b/src/eval/types.ts
index 72ae3f6..43b5fde 100644
--- a/src/eval/types.ts
+++ b/src/eval/types.ts
@@ -34,10 +34,12 @@ export interface EvalBudget {
   thresholds: {
     hitAt5MaxDrop?: number;
     mrrAt10MaxDrop?: number;
+    rawDistinctTop3RatioMaxDrop?: number;
     p95LatencyMaxMultiplier?: number;
     p95LatencyMaxAbsoluteMs?: number;
     minHitAt5?: number;
     minMrrAt10?: number;
+    minRawDistinctTop3Ratio?: number;
   };
 }
 

From 44caf10c95453d3c47d1cf8939651c72473fcf45 Mon Sep 17 00:00:00 2001
From: Helweg <kenneth@helweghansen.dk>
Date: Sat, 11 Apr 2026 21:59:44 +0200
Subject: [PATCH 15/20] test: cover raw diversity budget gating

---
 tests/eval-budget.test.ts | 64 +++++++++++++++++++++++++++++++++++++++
 tests/eval-schema.test.ts |  6 +++-
 2 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/tests/eval-budget.test.ts b/tests/eval-budget.test.ts
index d4b8c8c..f77b81f 100644
--- a/tests/eval-budget.test.ts
+++ b/tests/eval-budget.test.ts
@@ -25,6 +25,8 @@ function summary(p95: number): EvalSummary {
       hitAt10: 1,
       mrrAt10: 1,
       ndcgAt10: 1,
+      distinctTop3Ratio: 1,
+      rawDistinctTop3Ratio: 1,
       latencyMs: {
         p50: p95,
         p95,
@@ -59,6 +61,8 @@ function comparisonWithBaselineP95(baselineP95: number): EvalComparison {
       hitAt10: { current: 1, baseline: 1, absolute: 0, relativePct: 0 },
       mrrAt10: { current: 1, baseline: 1, absolute: 0, relativePct: 0 },
       ndcgAt10: { current: 1, baseline: 1, absolute: 0, relativePct: 0 },
+      distinctTop3Ratio: { current: 1, baseline: 1, absolute: 0, relativePct: 0 },
+      rawDistinctTop3Ratio: { current: 1, baseline: 1, absolute: 0, relativePct: 0 },
       latencyP50Ms: { current: 5, baseline: baselineP95, absolute: 5 - baselineP95, relativePct: 0 },
       latencyP95Ms: { current: 5, baseline: baselineP95, absolute: 5 - baselineP95, relativePct: 0 },
       latencyP99Ms: { current: 5, baseline: baselineP95, absolute: 5 - baselineP95, relativePct: 0 },
@@ -97,4 +101,64 @@ describe("eval budget gate", () => {
     expect(gate.passed).toBe(false);
     expect(gate.violations.some((v) => v.metric === "p95LatencyMaxAbsoluteMs")).toBe(true);
   });
+
+  it("fails when raw distinct top3 ratio drops below minimum", () => {
+    const budget: EvalBudget = {
+      name: "default",
+      failOnMissingBaseline: true,
+      thresholds: {
+        minRawDistinctTop3Ratio: 0.9,
+      },
+    };
+
+    const gate = evaluateBudgetGate(
+      budget,
+      {
+        ...summary(5),
+        metrics: {
+          ...summary(5).metrics,
+          rawDistinctTop3Ratio: 0.5,
+        },
+      }
+    );
+    expect(gate.passed).toBe(false);
+    expect(gate.violations.some((v) => v.metric === "minRawDistinctTop3Ratio")).toBe(true);
+  });
+
+  it("fails when raw distinct top3 ratio regresses beyond allowed drop", () => {
+    const budget: EvalBudget = {
+      name: "default",
+      failOnMissingBaseline: true,
+      thresholds: {
+        rawDistinctTop3RatioMaxDrop: 0.1,
+      },
+    };
+
+    const comparison: EvalComparison = {
+      ...comparisonWithBaselineP95(5),
+      deltas: {
+        ...comparisonWithBaselineP95(5).deltas,
+        rawDistinctTop3Ratio: {
+          current: 0.6,
+          baseline: 0.8,
+          absolute: -0.2,
+          relativePct: -25,
+        },
+      },
+    };
+
+    const gate = evaluateBudgetGate(
+      budget,
+      {
+        ...summary(5),
+        metrics: {
+          ...summary(5).metrics,
+          rawDistinctTop3Ratio: 0.6,
+        },
+      },
+      comparison
+    );
+    expect(gate.passed).toBe(false);
+    expect(gate.violations.some((v) => v.metric === "rawDistinctTop3RatioMaxDrop")).toBe(true);
+  });
 });
diff --git a/tests/eval-schema.test.ts b/tests/eval-schema.test.ts
index 399b2ea..8c149da 100644
--- a/tests/eval-schema.test.ts
+++ b/tests/eval-schema.test.ts
@@ -82,13 +82,17 @@ describe("eval schema", () => {
         thresholds: {
           hitAt5MaxDrop: 0.05,
           mrrAt10MaxDrop: 0.02,
+          rawDistinctTop3RatioMaxDrop: 0.1,
           p95LatencyMaxMultiplier: 1.5,
+          minRawDistinctTop3Ratio: 0.7,
         },
       },
       "budget.json"
     );
 
     expect(budget.thresholds.hitAt5MaxDrop).toBe(0.05);
+    expect(budget.thresholds.rawDistinctTop3RatioMaxDrop).toBe(0.1);
+    expect(budget.thresholds.minRawDistinctTop3Ratio).toBe(0.7);
     expect(budget.failOnMissingBaseline).toBe(true);
   });
 
@@ -98,7 +102,7 @@ describe("eval schema", () => {
         {
           name: "default",
           thresholds: {
-            hitAt5MaxDrop: "bad",
+            rawDistinctTop3RatioMaxDrop: "bad",
           },
         },
         "budget.json"

From 3694b4c18f85528f6cad6065a26b0f6cd05a0cc1 Mon Sep 17 00:00:00 2001
From: Helweg <kenneth@helweghansen.dk>
Date: Sat, 11 Apr 2026 22:01:38 +0200
Subject: [PATCH 16/20] test: configure raw diversity eval budgets

---
 benchmarks/baselines/eval-baseline-summary.json | 2 ++
 benchmarks/budgets/default.json                 | 4 +++-
 benchmarks/budgets/github-models.json           | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/benchmarks/baselines/eval-baseline-summary.json b/benchmarks/baselines/eval-baseline-summary.json
index 7efdc86..68d3371 100644
--- a/benchmarks/baselines/eval-baseline-summary.json
+++ b/benchmarks/baselines/eval-baseline-summary.json
@@ -19,6 +19,8 @@
     "hitAt10": 1,
     "mrrAt10": 0.875,
     "ndcgAt10": 0.9127302324517832,
+    "distinctTop3Ratio": 1,
+    "rawDistinctTop3Ratio": 1,
     "latencyMs": {
       "p50": 26.173166000000037,
       "p95": 52.931082999999944,
diff --git a/benchmarks/budgets/default.json b/benchmarks/budgets/default.json
index 67c1ca3..9f62fde 100644
--- a/benchmarks/budgets/default.json
+++ b/benchmarks/budgets/default.json
@@ -5,9 +5,11 @@
   "thresholds": {
     "hitAt5MaxDrop": 0.03,
     "mrrAt10MaxDrop": 0.03,
+    "rawDistinctTop3RatioMaxDrop": 0.1,
     "p95LatencyMaxMultiplier": 1.35,
     "p95LatencyMaxAbsoluteMs": 4000,
     "minHitAt5": 0.4,
-    "minMrrAt10": 0.25
+    "minMrrAt10": 0.25,
+    "minRawDistinctTop3Ratio": 0.5
   }
 }
diff --git a/benchmarks/budgets/github-models.json b/benchmarks/budgets/github-models.json
index ca73498..cee0cac 100644
--- a/benchmarks/budgets/github-models.json
+++ b/benchmarks/budgets/github-models.json
@@ -4,6 +4,7 @@
   "thresholds": {
     "minHitAt5": 0.5,
     "minMrrAt10": 0.45,
+    "minRawDistinctTop3Ratio": 0.5,
     "p95LatencyMaxAbsoluteMs": 500
   }
 }

From 01d4082d168977f1960061a47c3add18e7fc01d5 Mon Sep 17 00:00:00 2001
From: Helweg <kenneth@helweghansen.dk>
Date: Sat, 11 Apr 2026 22:57:02 +0200
Subject: [PATCH 17/20] fix: keep reranker config optional after rebase

---
 src/config/schema.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/config/schema.ts b/src/config/schema.ts
index be005ff..26c938c 100644
--- a/src/config/schema.ts
+++ b/src/config/schema.ts
@@ -127,7 +127,7 @@ export type ParsedCodebaseIndexConfig = CodebaseIndexConfig & {
   indexing: IndexingConfig;
   search: SearchConfig;
   debug: DebugConfig;
-  reranker: RerankerConfig;
+  reranker?: RerankerConfig;
   knowledgeBases: string[];
   additionalInclude: string[];
 };

From 2372debdaf9e7f960f326c663679955e8f2bc7c2 Mon Sep 17 00:00:00 2001
From: Helweg <kenneth@helweghansen.dk>
Date: Sat, 11 Apr 2026 22:57:11 +0200
Subject: [PATCH 18/20] fix: preserve doc intent after rerank rebase merge

---
 src/indexer/index.ts             | 77 ++++----------------------------
 tests/retrieval-ranking.test.ts  | 59 ++++++++++++++++++++++++
 tests/search-integration.test.ts |  4 +-
 3 files changed, 70 insertions(+), 70 deletions(-)

diff --git a/src/indexer/index.ts b/src/indexer/index.ts
index 29ea48f..0a806c9 100644
--- a/src/indexer/index.ts
+++ b/src/indexer/index.ts
@@ -1590,14 +1590,18 @@ export class Indexer {
       return candidates;
     }
 
-    if (options?.definitionIntent === true || options?.hasIdentifierHints === true) {
-      return candidates;
-    }
-
     const queryTokens = Array.from(tokenizeTextForRanking(query));
     const preferSourcePaths = classifyQueryIntentRaw(query) === "source";
     const docIntent = classifyDocIntent(queryTokens) === "docs";
 
+    if (options?.definitionIntent === true) {
+      return candidates;
+    }
+
+    if (options?.hasIdentifierHints === true && preferSourcePaths && !docIntent) {
+      return candidates;
+    }
+
     const topN = Math.min(reranker.topN, candidates.length);
     const head = candidates.slice(0, topN);
     const tail = candidates.slice(topN);
@@ -2867,70 +2871,7 @@ export class Indexer {
       : baseFiltered
     ).slice(0, maxResults);
 
-    // Apply reranking if enabled and available
-    let finalResults = filtered;
-    if (this.reranker?.isAvailable() && filtered.length > 1) {
-      const rerankStartTime = performance.now();
-
-      // Read content for reranking
-      const documentsForRerank = await Promise.all(
-        filtered.map(async (r) => {
-          try {
-            const fileContent = await fsPromises.readFile(r.metadata.filePath, "utf-8");
-            const lines = fileContent.split("\n");
-            return lines.slice(r.metadata.startLine - 1, r.metadata.endLine).join("\n");
-          } catch {
-            return r.metadata.name ?? r.metadata.chunkType;
-          }
-        })
-      );
-
-      try {
-        const rerankResponse = await this.reranker.rerank(
-          query,
-          documentsForRerank,
-          this.config.reranker?.topN ?? filtered.length
-        );
-
-        if (rerankResponse.results.length > 0) {
-          // Create a map of original index to rerank score
-          const rerankScores = new Map<number, number>();
-          for (const result of rerankResponse.results) {
-            rerankScores.set(result.index, result.relevanceScore);
-          }
-
-          // Reorder results based on rerank scores
-          const rerankedIndices = rerankResponse.results
-            .sort((a, b) => b.relevanceScore - a.relevanceScore)
-            .map(r => r.index);
-
-          // Build final results: reranked first, then remaining
-          const rerankedSet = new Set(rerankedIndices);
-          const reranked = rerankedIndices
-            .filter(idx => idx < filtered.length)
-            .map(idx => ({
-              ...filtered[idx],
-              score: rerankScores.get(idx) ?? filtered[idx].score,
-            }));
-          const remaining = filtered
-            .filter((_, idx) => !rerankedSet.has(idx));
-
-          finalResults = [...reranked, ...remaining].slice(0, maxResults);
-        }
-
-        const rerankMs = performance.now() - rerankStartTime;
-        this.logger.search("debug", "Reranking complete", {
-          documentsReranked: documentsForRerank.length,
-          rerankMs: Math.round(rerankMs * 100) / 100,
-          tokensUsed: rerankResponse.tokensUsed,
-        });
-      } catch (error) {
-        // Reranking failed, use original results
-        this.logger.search("warn", "Reranking failed, using original results", {
-          error: error instanceof Error ? error.message : String(error),
-        });
-      }
-    }
+    const finalResults = filtered;
 
     const totalSearchMs = performance.now() - searchStartTime;
     this.logger.recordSearch(totalSearchMs, {
diff --git a/tests/retrieval-ranking.test.ts b/tests/retrieval-ranking.test.ts
index 82ab623..64d45f6 100644
--- a/tests/retrieval-ranking.test.ts
+++ b/tests/retrieval-ranking.test.ts
@@ -547,6 +547,65 @@ describe("retrieval ranking", () => {
     globalThis.fetch = fetchSpy;
   });
 
+  it("allows external reranker for documentation intent even when identifier hints are present", async () => {
+    const config = parseConfig({
+      embeddingProvider: "custom",
+      customProvider: {
+        baseUrl: "http://localhost:11434/v1",
+        model: "mock-embed",
+        dimensions: 8,
+      },
+      reranker: {
+        enabled: true,
+        provider: "custom",
+        model: "mock-reranker",
+        baseUrl: "https://rerank.example/v1",
+        topN: 3,
+      },
+    });
+    const indexer = new Indexer("/repo", config);
+
+    const fetchSpy = globalThis.fetch;
+    let rerankCalled = false;
+    let rerankDocuments: string[] | undefined;
+    globalThis.fetch = (async (input, init) => {
+      if (String(input).includes("/rerank")) {
+        rerankCalled = true;
+        rerankDocuments = (JSON.parse(String(init?.body ?? "{}")) as { documents?: string[] }).documents;
+        return new Response(JSON.stringify({
+          results: [
+            { index: 1, relevance_score: 0.99 },
+            { index: 0, relevance_score: 0.6 },
+          ],
+        }), { status: 200 });
+      }
+      return new Response(JSON.stringify({ data: [{ embedding: Array.from({ length: 8 }, () => 0.1) }], usage: { total_tokens: 1 } }), { status: 200 });
+    }) as typeof fetch;
+
+    const candidates: Candidate[] = [
+      { id: "impl", score: 0.9, metadata: meta({ filePath: "/repo/src/indexer/index.ts", name: "rankHybridResults", chunkType: "function", startLine: 1, endLine: 3 }) },
+      { id: "docs-readme", score: 0.89, metadata: meta({ filePath: "/repo/README.md", name: "retrieval documentation", chunkType: "other", startLine: 1, endLine: 3 }) },
+      { id: "docs-guide", score: 0.88, metadata: meta({ filePath: "/repo/docs/guide.md", name: "rankHybridResults guide", chunkType: "other", startLine: 1, endLine: 3 }) },
+    ];
+
+    const reranked = await (indexer as unknown as {
+      rerankCandidatesWithApi(
+        query: string,
+        items: Candidate[],
+        options?: { definitionIntent?: boolean; hasIdentifierHints?: boolean }
+      ): Promise<Candidate[]>;
+    }).rerankCandidatesWithApi("rankHybridResults documentation guide", candidates, {
+      hasIdentifierHints: true,
+    });
+
+    expect(rerankCalled).toBe(true);
+    expect(reranked.map((candidate) => candidate.id)).toEqual(["docs-guide", "docs-readme", "impl"]);
+    expect(rerankDocuments?.length).toBe(2);
+    expect(rerankDocuments?.[0]).toContain("path: /repo/README.md");
+    expect(rerankDocuments?.[1]).toContain("path: /repo/docs/guide.md");
+    globalThis.fetch = fetchSpy;
+  });
+
   it("diversifies external reranker output for exploratory queries", async () => {
     const config = parseConfig({
       embeddingProvider: "custom",
diff --git a/tests/search-integration.test.ts b/tests/search-integration.test.ts
index b2bbc72..0272d23 100644
--- a/tests/search-integration.test.ts
+++ b/tests/search-integration.test.ts
@@ -133,7 +133,7 @@ export function rerankResults(query: string) { return rankHybridResults(query);
     const indexer = new Indexer(tempDir, config);
     await indexer.index();
 
-    const results = await indexer.search("where is rankHybridResults documentation", 5, {
+    const results = await indexer.search("rankHybridResults documentation guide", 5, {
       metadataOnly: true,
       filterByBranch: false,
     });
@@ -338,7 +338,7 @@ export function rerankResults(query: string) { return rankHybridResults(query);
     const indexer = new Indexer(tempDir, config);
     await indexer.index();
 
-    const results = await indexer.search("where is rankHybridResults documentation", 5, {
+    const results = await indexer.search("rankHybridResults documentation guide", 5, {
       metadataOnly: true,
       filterByBranch: false,
     });

From 454766fa28bcc6c4347ca22a671fa9bb25907f0e Mon Sep 17 00:00:00 2001
From: Helweg <kenneth@helweghansen.dk>
Date: Sun, 12 Apr 2026 18:18:45 +0200
Subject: [PATCH 19/20] fix: reject legacy eval baselines missing diversity
 metrics

---
 src/eval/reports.ts       | 27 +++++++++++-
 tests/eval-runner.test.ts | 88 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 114 insertions(+), 1 deletion(-)

diff --git a/src/eval/reports.ts b/src/eval/reports.ts
index f55c21c..e9b2136 100644
--- a/src/eval/reports.ts
+++ b/src/eval/reports.ts
@@ -9,6 +9,31 @@ import type {
   SweepAggregateReport,
 } from "./types.js";
 
+function assertFiniteNumber(value: unknown, path: string): number {
+  if (typeof value !== "number" || Number.isNaN(value) || !Number.isFinite(value)) {
+    throw new Error(`${path} must be a finite number`);
+  }
+  return value;
+}
+
+function validateSummary(summary: EvalSummary, summaryPath: string): EvalSummary {
+  assertFiniteNumber(summary.metrics.hitAt1, `${summaryPath}.metrics.hitAt1`);
+  assertFiniteNumber(summary.metrics.hitAt3, `${summaryPath}.metrics.hitAt3`);
+  assertFiniteNumber(summary.metrics.hitAt5, `${summaryPath}.metrics.hitAt5`);
+  assertFiniteNumber(summary.metrics.hitAt10, `${summaryPath}.metrics.hitAt10`);
+  assertFiniteNumber(summary.metrics.mrrAt10, `${summaryPath}.metrics.mrrAt10`);
+  assertFiniteNumber(summary.metrics.ndcgAt10, `${summaryPath}.metrics.ndcgAt10`);
+  assertFiniteNumber(summary.metrics.distinctTop3Ratio, `${summaryPath}.metrics.distinctTop3Ratio`);
+  assertFiniteNumber(summary.metrics.rawDistinctTop3Ratio, `${summaryPath}.metrics.rawDistinctTop3Ratio`);
+  assertFiniteNumber(summary.metrics.latencyMs.p50, `${summaryPath}.metrics.latencyMs.p50`);
+  assertFiniteNumber(summary.metrics.latencyMs.p95, `${summaryPath}.metrics.latencyMs.p95`);
+  assertFiniteNumber(summary.metrics.latencyMs.p99, `${summaryPath}.metrics.latencyMs.p99`);
+  assertFiniteNumber(summary.metrics.embedding.callCount, `${summaryPath}.metrics.embedding.callCount`);
+  assertFiniteNumber(summary.metrics.embedding.estimatedCostUsd, `${summaryPath}.metrics.embedding.estimatedCostUsd`);
+
+  return summary;
+}
+
 function formatPct(value: number): string {
   return `${(value * 100).toFixed(2)}%`;
 }
@@ -28,7 +53,7 @@ function signed(value: number, digits = 4): string {
 
 export function loadSummary(summaryPath: string): EvalSummary {
   const raw = readFileSync(summaryPath, "utf-8");
-  return JSON.parse(raw) as EvalSummary;
+  return validateSummary(JSON.parse(raw) as EvalSummary, summaryPath);
 }
 
 export function createRunDirectory(outputRoot: string, timestampOverride?: string): string {
diff --git a/tests/eval-runner.test.ts b/tests/eval-runner.test.ts
index 558201b..7aac67d 100644
--- a/tests/eval-runner.test.ts
+++ b/tests/eval-runner.test.ts
@@ -161,6 +161,94 @@ describe("eval runner", () => {
     expect(readFileSync(path.join(compareRun.outputDir, "compare.json"), "utf-8")).toContain("\"deltas\"");
   });
 
+  it("fails fast when baseline summary is missing required diversity metrics", async () => {
+    const baselineRun = await runEvaluation({
+      projectRoot: tempDir,
+      datasetPath: "benchmarks/golden/small.json",
+      outputRoot: "benchmarks/results",
+      ciMode: false,
+      reindex: false,
+    });
+
+    const legacyBaseline = {
+      ...baselineRun.summary,
+      metrics: {
+        ...baselineRun.summary.metrics,
+      },
+    } as Record<string, unknown>;
+
+    delete (legacyBaseline.metrics as Record<string, unknown>).distinctTop3Ratio;
+    delete (legacyBaseline.metrics as Record<string, unknown>).rawDistinctTop3Ratio;
+
+    const baselinePath = path.join(tempDir, "benchmarks", "baselines", "legacy-baseline-summary.json");
+    writeFileSync(baselinePath, JSON.stringify(legacyBaseline, null, 2), "utf-8");
+
+    await expect(
+      runEvaluation({
+        projectRoot: tempDir,
+        datasetPath: "benchmarks/golden/small.json",
+        outputRoot: "benchmarks/results",
+        againstPath: "benchmarks/baselines/legacy-baseline-summary.json",
+        ciMode: false,
+        reindex: false,
+      })
+    ).rejects.toThrow(/metrics\.distinctTop3Ratio must be a finite number/);
+  });
+
+  it("fails ci mode when budget baseline summary is missing required diversity metrics", async () => {
+    const baselineRun = await runEvaluation({
+      projectRoot: tempDir,
+      datasetPath: "benchmarks/golden/small.json",
+      outputRoot: "benchmarks/results",
+      ciMode: false,
+      reindex: false,
+    });
+
+    const legacyBaseline = {
+      ...baselineRun.summary,
+      metrics: {
+        ...baselineRun.summary.metrics,
+      },
+    } as Record<string, unknown>;
+
+    delete (legacyBaseline.metrics as Record<string, unknown>).distinctTop3Ratio;
+    delete (legacyBaseline.metrics as Record<string, unknown>).rawDistinctTop3Ratio;
+
+    writeFileSync(
+      path.join(tempDir, "benchmarks", "baselines", "legacy-baseline-summary.json"),
+      JSON.stringify(legacyBaseline, null, 2),
+      "utf-8"
+    );
+
+    writeFileSync(
+      path.join(tempDir, "benchmarks", "budgets", "legacy-check.json"),
+      JSON.stringify(
+        {
+          name: "legacy-check",
+          baselinePath: "benchmarks/baselines/legacy-baseline-summary.json",
+          failOnMissingBaseline: true,
+          thresholds: {
+            rawDistinctTop3RatioMaxDrop: 0.1,
+          },
+        },
+        null,
+        2
+      ),
+      "utf-8"
+    );
+
+    await expect(
+      runEvaluation({
+        projectRoot: tempDir,
+        datasetPath: "benchmarks/golden/small.json",
+        outputRoot: "benchmarks/results",
+        ciMode: true,
+        budgetPath: "benchmarks/budgets/legacy-check.json",
+        reindex: false,
+      })
+    ).rejects.toThrow(/metrics\.distinctTop3Ratio must be a finite number/);
+  });
+
   it("fails ci gate when thresholds regress beyond tolerance", async () => {
     const baselineRun = await runEvaluation({
       projectRoot: tempDir,

From 926c828208fc1f904078bbb1d773cc48f1283815 Mon Sep 17 00:00:00 2001
From: Helweg <kenneth@helweghansen.dk>
Date: Sun, 12 Apr 2026 18:58:50 +0200
Subject: [PATCH 20/20] fix: allow eval diff to read legacy summaries

---
 src/eval/cli.ts        |  8 +++++--
 src/eval/reports.ts    | 31 +++++++++++++++++++++-----
 tests/eval-cli.test.ts | 50 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 82 insertions(+), 7 deletions(-)

diff --git a/src/eval/cli.ts b/src/eval/cli.ts
index 62b1bcd..4b77efb 100644
--- a/src/eval/cli.ts
+++ b/src/eval/cli.ts
@@ -336,8 +336,12 @@ export async function handleEvalCommand(args: string[], cwd: string): Promise<nu
     if (!parsed.againstPath.endsWith(".json")) {
       throw new Error("eval diff --against must point to a summary JSON file");
     }
-    const currentSummary = loadSummary(path.resolve(parsed.projectRoot, currentPath));
-    const baselineSummary = loadSummary(path.resolve(parsed.projectRoot, parsed.againstPath));
+    const currentSummary = loadSummary(path.resolve(parsed.projectRoot, currentPath), {
+      allowLegacyDiversityMetrics: true,
+    });
+    const baselineSummary = loadSummary(path.resolve(parsed.projectRoot, parsed.againstPath), {
+      allowLegacyDiversityMetrics: true,
+    });
     const comparison = compareSummaries(
       currentSummary,
       baselineSummary,
diff --git a/src/eval/reports.ts b/src/eval/reports.ts
index e9b2136..42bf252 100644
--- a/src/eval/reports.ts
+++ b/src/eval/reports.ts
@@ -9,6 +9,10 @@ import type {
   SweepAggregateReport,
 } from "./types.js";
 
+interface LoadSummaryOptions {
+  allowLegacyDiversityMetrics?: boolean;
+}
+
 function assertFiniteNumber(value: unknown, path: string): number {
   if (typeof value !== "number" || Number.isNaN(value) || !Number.isFinite(value)) {
     throw new Error(`${path} must be a finite number`);
@@ -16,15 +20,32 @@ function assertFiniteNumber(value: unknown, path: string): number {
   return value;
 }
 
-function validateSummary(summary: EvalSummary, summaryPath: string): EvalSummary {
+function validateSummary(
+  summary: EvalSummary,
+  summaryPath: string,
+  options?: LoadSummaryOptions
+): EvalSummary {
   assertFiniteNumber(summary.metrics.hitAt1, `${summaryPath}.metrics.hitAt1`);
   assertFiniteNumber(summary.metrics.hitAt3, `${summaryPath}.metrics.hitAt3`);
   assertFiniteNumber(summary.metrics.hitAt5, `${summaryPath}.metrics.hitAt5`);
   assertFiniteNumber(summary.metrics.hitAt10, `${summaryPath}.metrics.hitAt10`);
   assertFiniteNumber(summary.metrics.mrrAt10, `${summaryPath}.metrics.mrrAt10`);
   assertFiniteNumber(summary.metrics.ndcgAt10, `${summaryPath}.metrics.ndcgAt10`);
-  assertFiniteNumber(summary.metrics.distinctTop3Ratio, `${summaryPath}.metrics.distinctTop3Ratio`);
-  assertFiniteNumber(summary.metrics.rawDistinctTop3Ratio, `${summaryPath}.metrics.rawDistinctTop3Ratio`);
+
+  const metrics = summary.metrics as EvalSummary["metrics"] & {
+    distinctTop3Ratio?: number;
+    rawDistinctTop3Ratio?: number;
+  };
+
+  if (metrics.distinctTop3Ratio === undefined && options?.allowLegacyDiversityMetrics) {
+    metrics.distinctTop3Ratio = 0;
+  }
+  if (metrics.rawDistinctTop3Ratio === undefined && options?.allowLegacyDiversityMetrics) {
+    metrics.rawDistinctTop3Ratio = 0;
+  }
+
+  assertFiniteNumber(metrics.distinctTop3Ratio, `${summaryPath}.metrics.distinctTop3Ratio`);
+  assertFiniteNumber(metrics.rawDistinctTop3Ratio, `${summaryPath}.metrics.rawDistinctTop3Ratio`);
   assertFiniteNumber(summary.metrics.latencyMs.p50, `${summaryPath}.metrics.latencyMs.p50`);
   assertFiniteNumber(summary.metrics.latencyMs.p95, `${summaryPath}.metrics.latencyMs.p95`);
   assertFiniteNumber(summary.metrics.latencyMs.p99, `${summaryPath}.metrics.latencyMs.p99`);
@@ -51,9 +72,9 @@ function signed(value: number, digits = 4): string {
   return value > 0 ? `+${formatted}` : formatted;
 }
 
-export function loadSummary(summaryPath: string): EvalSummary {
+export function loadSummary(summaryPath: string, options?: LoadSummaryOptions): EvalSummary {
   const raw = readFileSync(summaryPath, "utf-8");
-  return validateSummary(JSON.parse(raw) as EvalSummary, summaryPath);
+  return validateSummary(JSON.parse(raw) as EvalSummary, summaryPath, options);
 }
 
 export function createRunDirectory(outputRoot: string, timestampOverride?: string): string {
diff --git a/tests/eval-cli.test.ts b/tests/eval-cli.test.ts
index ff60715..6ae5768 100644
--- a/tests/eval-cli.test.ts
+++ b/tests/eval-cli.test.ts
@@ -108,6 +108,8 @@ describe("eval cli", () => {
         hitAt10: 1,
         mrrAt10: 1,
         ndcgAt10: 1,
+        distinctTop3Ratio: 1,
+        rawDistinctTop3Ratio: 1,
         latencyMs: { p50: 1, p95: 2, p99: 3 },
         tokenEstimate: { queryTokens: 10, embeddingTokensUsed: 20 },
         embedding: { callCount: 1, estimatedCostUsd: 0, costPer1MTokensUsd: 0 },
@@ -130,4 +132,52 @@ describe("eval cli", () => {
 
     expect(exitCode).toBe(0);
   });
+
+  it("allows eval diff to read legacy summaries missing diversity metrics", async () => {
+    const currentSummaryPath = path.join(tempDir, "current.json");
+    const baselineSummaryPath = path.join(tempDir, "baseline.json");
+
+    const legacySummary = {
+      generatedAt: new Date().toISOString(),
+      projectRoot: tempDir,
+      datasetPath: "benchmarks/golden/small.json",
+      datasetName: "small",
+      datasetVersion: "1.0.0",
+      queryCount: 1,
+      topK: 10,
+      searchConfig: {
+        fusionStrategy: "rrf",
+        hybridWeight: 0.4,
+        rrfK: 60,
+        rerankTopN: 20,
+      },
+      metrics: {
+        hitAt1: 1,
+        hitAt3: 1,
+        hitAt5: 1,
+        hitAt10: 1,
+        mrrAt10: 1,
+        ndcgAt10: 1,
+        latencyMs: { p50: 1, p95: 2, p99: 3 },
+        tokenEstimate: { queryTokens: 10, embeddingTokensUsed: 20 },
+        embedding: { callCount: 1, estimatedCostUsd: 0, costPer1MTokensUsd: 0 },
+        failureBuckets: {
+          "wrong-file": 0,
+          "wrong-symbol": 0,
+          "docs-tests-outranking-source": 0,
+          "no-relevant-hit-top-k": 0,
+        },
+      },
+    };
+
+    writeFileSync(currentSummaryPath, JSON.stringify(legacySummary, null, 2), "utf-8");
+    writeFileSync(baselineSummaryPath, JSON.stringify(legacySummary, null, 2), "utf-8");
+
+    const exitCode = await handleEvalCommand(
+      ["diff", "--current", "current.json", "--against", "baseline.json"],
+      tempDir
+    );
+
+    expect(exitCode).toBe(0);
+  });
 });