diff --git a/README.md b/README.md index 55085e5..46f845c 100644 --- a/README.md +++ b/README.md @@ -132,12 +132,13 @@ src/api/checkout.ts:89 (Route handler for /pay) | Don't know the function name | `codebase_search` | Semantic search finds by meaning | | Exploring unfamiliar codebase | `codebase_search` | Discovers related code across files | | Just need to find locations | `codebase_peek` | Returns metadata only, saves ~90% tokens | +| Need the authoritative definition site | `implementation_lookup` | Prioritizes real implementation definitions over docs/tests | | Understand code flow | `call_graph` | Find callers/callees of any function | | Know exact identifier | `grep` | Faster, finds all occurrences | | Need ALL matches | `grep` | Semantic returns top N only | | Mixed discovery + precision | `/find` (hybrid) | Best of both worlds | -**Rule of thumb**: `codebase_peek` to find locations → `Read` to examine → `grep` for precision. +**Rule of thumb**: `codebase_peek` to find locations → `Read` to examine → `grep` for precision. For symbol-definition questions, use `implementation_lookup` first. ## 📊 Token Usage @@ -296,6 +297,12 @@ The plugin exposes these tools to the OpenCode agent: ``` - **Workflow**: `codebase_peek` → find locations → `Read` specific files +### `implementation_lookup` +**Definition-first lookup.** Jumps to the authoritative definition site for a symbol or natural-language definition query. +- **Use for**: "Where is X defined?", symbol-definition requests, and cases where you want the implementation site rather than all usages. +- **Behavior**: Prefers real implementation files over tests, docs, examples, and fixtures. +- **Fallback**: If nothing authoritative is found, use `codebase_search` for broader discovery. + ### `find_similar` Find code similar to a provided snippet. - **Use for**: Duplicate detection, refactor prep, pattern mining. @@ -530,7 +537,8 @@ Zero-config by default (uses `auto` mode). Customize in `.opencode/codebase-inde "fusionStrategy": "rrf", // rrf | weighted "rrfK": 60, // RRF smoothing constant "rerankTopN": 20, // Deterministic rerank depth - "contextLines": 0 // Extra lines before/after match + "contextLines": 0, // Extra lines before/after match + "routingHints": true // Runtime nudges for local discovery/definition queries }, "reranker": { "enabled": false, @@ -600,6 +608,7 @@ String values in `codebase-index.json` can reference environment variables with | `rrfK` | `60` | RRF smoothing constant. Higher values flatten rank impact, lower values prioritize top-ranked candidates more strongly | | `rerankTopN` | `20` | Deterministic rerank depth cap. Applies lightweight name/path/chunk-type rerank to top-N only | | `contextLines` | `0` | Extra lines to include before/after each match | +| `routingHints` | `true` | Inject lightweight runtime hints for local conceptual discovery and definition lookups. Set to `false` to disable plugin-side routing nudges. | | **reranker** | | Optional second-stage model reranker for the top candidate pool | | `enabled` | `false` | Turn external reranking on/off | | `provider` | `"custom"` | Hosted shortcuts: `cohere`, `jina`, or `custom` | @@ -621,6 +630,7 @@ String values in `codebase-index.json` can reference environment variables with ### Retrieval ranking behavior - `codebase_search` and `codebase_peek` use the hybrid path: semantic + keyword retrieval → fusion (`fusionStrategy`) → deterministic rerank (`rerankTopN`) → optional external reranker (`reranker`) → filtering. +- When `search.routingHints` is enabled (default), the plugin adds tiny per-turn runtime hints for local conceptual discovery and definition queries. Conceptual discovery is nudged toward `codebase_peek` / `codebase_search`, while definition questions are nudged toward `implementation_lookup`. Exact identifier and unrelated operational tasks are left alone. - `find_similar` stays semantic-only: semantic retrieval + deterministic rerank only (no keyword retrieval, no RRF). - For compatibility rollbacks, set `search.fusionStrategy` to `"weighted"` to use the legacy weighted fusion path. - When enabled, the external reranker sees path metadata plus a bounded on-disk code snippet for each candidate so it can distinguish real implementations from docs/tests more reliably. diff --git a/src/config/schema.ts b/src/config/schema.ts index 26c938c..5c5780b 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -47,6 +47,7 @@ export interface SearchConfig { rrfK: number; rerankTopN: number; contextLines: number; + routingHints: boolean; } export type RerankerProvider = "cohere" | "jina" | "custom"; @@ -161,6 +162,7 @@ function getDefaultSearchConfig(): SearchConfig { rrfK: 60, rerankTopN: 20, contextLines: 0, + routingHints: true, }; } @@ -277,6 +279,7 @@ export function parseConfig(raw: unknown): ParsedCodebaseIndexConfig { rrfK: typeof rawSearch.rrfK === "number" ? Math.max(1, Math.floor(rawSearch.rrfK)) : defaultSearch.rrfK, rerankTopN: typeof rawSearch.rerankTopN === "number" ? Math.min(200, Math.max(0, Math.floor(rawSearch.rerankTopN))) : defaultSearch.rerankTopN, contextLines: typeof rawSearch.contextLines === "number" ? Math.min(50, Math.max(0, rawSearch.contextLines)) : defaultSearch.contextLines, + routingHints: typeof rawSearch.routingHints === "boolean" ? rawSearch.routingHints : defaultSearch.routingHints, }; const rawDebug = (input.debug && typeof input.debug === "object" ? input.debug : {}) as Record; diff --git a/src/index.ts b/src/index.ts index dfaa13f..d04c4aa 100644 --- a/src/index.ts +++ b/src/index.ts @@ -23,6 +23,7 @@ import { initializeTools, } from "./tools/index.js"; import { loadCommandsFromDirectory } from "./commands/loader.js"; +import { RoutingHintController } from "./routing-hints.js"; import { hasProjectMarker } from "./utils/files.js"; import type { CombinedWatcher } from "./watcher/index.js"; @@ -52,6 +53,9 @@ const plugin: Plugin = async ({ directory }) => { initializeTools(projectRoot, config); const indexer = getSharedIndexer(); + const routingHints = config.search.routingHints + ? new RoutingHintController(() => indexer.getStatus()) + : null; const isValidProject = !config.indexing.requireProjectMarker || hasProjectMarker(projectRoot); @@ -91,6 +95,19 @@ const plugin: Plugin = async ({ directory }) => { remove_knowledge_base, }, + async "chat.message"(input, output) { + routingHints?.observeUserMessage(input.sessionID, output.parts); + }, + + async "experimental.chat.system.transform"(input, output) { + const hints = await routingHints?.getSystemHints(input.sessionID) ?? []; + output.system.push(...hints); + }, + + async "tool.execute.after"(input) { + routingHints?.markToolUsed(input.sessionID, input.tool); + }, + async config(cfg) { cfg.command = cfg.command ?? {}; diff --git a/src/routing-hints.ts b/src/routing-hints.ts new file mode 100644 index 0000000..f532a88 --- /dev/null +++ b/src/routing-hints.ts @@ -0,0 +1,353 @@ +import type { StatusResult } from "./indexer/index.js"; + +export type RoutingIntent = + | "local_conceptual" + | "definition_lookup" + | "exact_identifier" + | "external" + | "direct_path" + | "other"; + +export interface RoutingAssessment { + intent: RoutingIntent; + text: string; + reason: string; +} + +export interface RoutingSessionState { + assessment: RoutingAssessment; + pendingHint: boolean; + updatedAt: number; +} + +interface TextPartLike { + type?: string; + text?: string; +} + +const EXTERNAL_HINTS = [ + "docs", + "documentation", + "official docs", + "github example", + "github examples", + "github repo", + "github repository", + "web search", + "website", + "url", + "npm", + "pypi", + "crate", + "library", + "package", + "framework", + "context7", + "stackoverflow", +]; + +const NON_DISCOVERY_HINTS = [ + "commit", + "rebase", + "push", + "pull request", + "pr", + "lint", + "typecheck", + "build", + "test", + "release", + "deploy", + "screenshot", + "browser", + "open the website", +]; + +const CONCEPTUAL_DISCOVERY_HINTS = [ + "where is", + "where are", + "which file", + "what file", + "how does", + "how do we", + "how is", + "find the code", + "find code", + "find where", + "find logic", + "implementation", + "implements", + "handler", + "flow", + "logic", + "middleware", + "parser", + "validation", + "rate limiting", + "error handling", + "auth flow", + "responsible for", + "similar code", + "pattern", + "code that", +]; + +const DEFINITION_HINTS = [ + "defined", + "definition", + "jump to", + "definition site", + "authoritative definition", +]; + +const EXACT_MATCH_HINTS = [ + "exact", + "all references", + "all occurrences", + "literal", + "regex", + "grep", + "identifier", + "symbol", + "named", + "definition of", +]; + +const FILE_PATH_PATTERN = /(?:^|\s)(?:\.?\.?\/)?[\w.-]+(?:\/[\w.-]+)+/; +const URL_PATTERN = /https?:\/\//; +const CAMEL_OR_PASCAL_PATTERN = /\b[A-Za-z_$][A-Za-z0-9_$]*\b/g; +const SNAKE_PATTERN = /\b[a-z0-9]+_[a-z0-9_]+\b/g; +const KEBAB_PATTERN = /\b[a-z0-9]+-[a-z0-9-]+\b/g; +const BACKTICK_IDENTIFIER_PATTERN = /`([^`]+)`/g; +const BACKTICK_IDENTIFIER_PRESENCE_PATTERN = /`([^`]+)`/; + +function normalizeText(text: string): string { + return text.trim().replace(/\s+/g, " "); +} + +function includesHint(text: string, hints: string[]): boolean { + return hints.some((hint) => text.includes(hint)); +} + +function countWords(text: string): number { + if (!text) { + return 0; + } + + return text.split(/\s+/).filter(Boolean).length; +} + +function hasIdentifierShape(text: string): boolean { + const matches = [ + ...(text.match(CAMEL_OR_PASCAL_PATTERN) ?? []), + ...(text.match(SNAKE_PATTERN) ?? []), + ...(text.match(KEBAB_PATTERN) ?? []), + ...Array.from(text.matchAll(BACKTICK_IDENTIFIER_PATTERN), (match) => match[1]), + ]; + + return matches.some((match) => { + if (match.length < 3) { + return false; + } + + return /[A-Z]/.test(match) || match.includes("_") || match.includes("-") || /`/.test(match); + }); +} + +function containsQuotedIdentifier(text: string): boolean { + return BACKTICK_IDENTIFIER_PRESENCE_PATTERN.test(text) || /"[^"]+"/.test(text) || /'[^']+'/.test(text); +} + +function looksLikeDirectPath(text: string): boolean { + return FILE_PATH_PATTERN.test(text) || /\b[a-z0-9_-]+\.(ts|tsx|js|jsx|rs|py|go|java|json|md|yaml|yml)\b/i.test(text); +} + +export function extractUserText(parts: TextPartLike[]): string { + return normalizeText( + parts + .filter((part) => part.type === "text" && typeof part.text === "string") + .map((part) => part.text ?? "") + .join(" "), + ); +} + +export function assessRoutingIntent(text: string): RoutingAssessment { + const normalizedText = normalizeText(text); + const lowered = normalizedText.toLowerCase(); + + if (!lowered) { + return { + intent: "other", + text: normalizedText, + reason: "empty_text", + }; + } + + if (URL_PATTERN.test(lowered) || includesHint(lowered, EXTERNAL_HINTS)) { + return { + intent: "external", + text: normalizedText, + reason: "external_lookup", + }; + } + + const hasConceptualHint = includesHint(lowered, CONCEPTUAL_DISCOVERY_HINTS); + const hasDefinitionHint = includesHint(lowered, DEFINITION_HINTS); + const hasExactMatchHint = includesHint(lowered, EXACT_MATCH_HINTS); + const hasNonDiscoveryHint = includesHint(lowered, NON_DISCOVERY_HINTS); + const hasIdentifier = hasIdentifierShape(normalizedText); + const hasQuotedIdentifier = containsQuotedIdentifier(normalizedText); + const shortQuery = countWords(lowered) <= 10; + + if (hasNonDiscoveryHint && !hasConceptualHint) { + return { + intent: "other", + text: normalizedText, + reason: "non_discovery_task", + }; + } + + if (looksLikeDirectPath(normalizedText)) { + return { + intent: "direct_path", + text: normalizedText, + reason: "direct_path_reference", + }; + } + + if ((hasDefinitionHint || lowered.includes("where is") || lowered.includes("where are")) && (lowered.includes("defined") || lowered.includes("definition"))) { + return { + intent: "definition_lookup", + text: normalizedText, + reason: "definition_lookup_request", + }; + } + + if ((hasExactMatchHint || hasQuotedIdentifier || hasIdentifier) && !hasConceptualHint && shortQuery) { + return { + intent: "exact_identifier", + text: normalizedText, + reason: hasExactMatchHint || hasQuotedIdentifier ? "exact_match_request" : "identifier_shaped_query", + }; + } + + if (hasConceptualHint) { + return { + intent: "local_conceptual", + text: normalizedText, + reason: "conceptual_local_discovery", + }; + } + + return { + intent: "other", + text: normalizedText, + reason: "no_local_discovery_signal", + }; +} + +export function buildRoutingHint( + assessment: RoutingAssessment, + status: Pick | null, +): string | null { + if (assessment.intent === "definition_lookup") { + if (!status || !status.indexed || status.compatibility?.compatible === false) { + return "For this turn, if you need a symbol definition, check `index_status` first and run `index_codebase` if the index is missing or incompatible. Then use `implementation_lookup` for the definition site. Use `grep` for exhaustive literal matches."; + } + + return "For this turn, prefer `implementation_lookup` to find the authoritative definition site. Use `codebase_search` only if no definition is found, and use `grep` for exhaustive literal matches."; + } + + if (assessment.intent !== "local_conceptual") { + return null; + } + + if (!status || !status.indexed || status.compatibility?.compatible === false) { + return "For this turn, if local code discovery by behavior is needed, check `index_status` first and run `index_codebase` if the index is missing or incompatible. Use `grep` for exact identifiers or exhaustive matches."; + } + + return "For this turn, prefer `codebase_peek` for local code discovery by behavior or likely location, then use `codebase_search` when you need implementation content. Use `grep` for exact identifiers or exhaustive matches."; +} + +export class RoutingHintController { + private readonly sessionState = new Map(); + + constructor( + private readonly getStatus: () => Promise>, + private readonly maxSessions: number = 200, + ) {} + + observeUserMessage(sessionID: string, parts: TextPartLike[]): RoutingAssessment { + const assessment = assessRoutingIntent(extractUserText(parts)); + + this.compactSessions(); + this.sessionState.set(sessionID, { + assessment, + pendingHint: assessment.intent === "local_conceptual" || assessment.intent === "definition_lookup", + updatedAt: Date.now(), + }); + + return assessment; + } + + async getSystemHints(sessionID?: string): Promise { + if (!sessionID) { + return []; + } + + const state = this.sessionState.get(sessionID); + if (!state || !state.pendingHint) { + return []; + } + + const status = await this.safeGetStatus(); + const hint = buildRoutingHint(state.assessment, status); + + return hint ? [hint] : []; + } + + markToolUsed(sessionID: string, toolName: string): void { + const state = this.sessionState.get(sessionID); + if (!state || !state.pendingHint) { + return; + } + + if ( + toolName === "codebase_peek" + || toolName === "codebase_search" + || toolName === "implementation_lookup" + || toolName === "index_status" + || toolName === "index_codebase" + ) { + state.pendingHint = false; + state.updatedAt = Date.now(); + this.sessionState.set(sessionID, state); + } + } + + getSessionState(sessionID: string): RoutingSessionState | undefined { + return this.sessionState.get(sessionID); + } + + private async safeGetStatus(): Promise | null> { + try { + return await this.getStatus(); + } catch { + return null; + } + } + + private compactSessions(): void { + if (this.sessionState.size < this.maxSessions) { + return; + } + + const oldestSession = [...this.sessionState.entries()] + .sort((left, right) => left[1].updatedAt - right[1].updatedAt) + .at(0); + + if (oldestSession) { + this.sessionState.delete(oldestSession[0]); + } + } +} diff --git a/tests/config.test.ts b/tests/config.test.ts index ac71f5d..4d37b93 100644 --- a/tests/config.test.ts +++ b/tests/config.test.ts @@ -289,6 +289,16 @@ describe("config schema", () => { expect(config.search.fusionStrategy).toBe("rrf"); expect(config.search.rrfK).toBe(60); expect(config.search.rerankTopN).toBe(20); + expect(config.search.routingHints).toBe(true); + }); + + it("should parse routingHints boolean", () => { + expect(parseConfig({ search: { routingHints: false } }).search.routingHints).toBe(false); + expect(parseConfig({ search: { routingHints: true } }).search.routingHints).toBe(true); + }); + + it("should fallback routingHints to default for invalid values", () => { + expect(parseConfig({ search: { routingHints: "nope" } }).search.routingHints).toBe(true); }); it("should fallback fusionStrategy to default for invalid values", () => { diff --git a/tests/routing-hints.test.ts b/tests/routing-hints.test.ts new file mode 100644 index 0000000..0a1d575 --- /dev/null +++ b/tests/routing-hints.test.ts @@ -0,0 +1,199 @@ +import { describe, expect, it } from "vitest"; + +import { + RoutingHintController, + assessRoutingIntent, + buildRoutingHint, + extractUserText, +} from "../src/routing-hints.js"; + +describe("routing hints", () => { + describe("extractUserText", () => { + it("combines text parts and ignores non-text parts", () => { + const text = extractUserText([ + { type: "text", text: "where is the auth flow" }, + { type: "tool", text: "ignored" }, + { type: "text", text: "implemented" }, + ]); + + expect(text).toBe("where is the auth flow implemented"); + }); + }); + + describe("assessRoutingIntent", () => { + it("detects conceptual local discovery", () => { + const assessment = assessRoutingIntent("Where is the auth flow implemented?"); + + expect(assessment.intent).toBe("local_conceptual"); + expect(assessment.reason).toBe("conceptual_local_discovery"); + }); + + it("detects exact identifier lookups", () => { + const assessment = assessRoutingIntent("Find all references to `validateToken`"); + + expect(assessment.intent).toBe("exact_identifier"); + }); + + it("does not alternate exact-identifier detection for repeated backticked queries", () => { + const first = assessRoutingIntent("Find all references to `validateToken`"); + const second = assessRoutingIntent("Find all references to `otherSymbol`"); + const third = assessRoutingIntent("Find all references to `validateToken`"); + + expect(first.intent).toBe("exact_identifier"); + expect(second.intent).toBe("exact_identifier"); + expect(third.intent).toBe("exact_identifier"); + }); + + it("detects definition lookups separately from conceptual discovery", () => { + const assessment = assessRoutingIntent("Where is the payment handler defined?"); + + expect(assessment.intent).toBe("definition_lookup"); + expect(assessment.reason).toBe("definition_lookup_request"); + }); + + it("detects direct path requests", () => { + const assessment = assessRoutingIntent("Inspect src/indexer/index.ts for ranking logic"); + + expect(assessment.intent).toBe("direct_path"); + }); + + it("detects external lookups", () => { + const assessment = assessRoutingIntent("Check the official docs for Next.js app router"); + + expect(assessment.intent).toBe("external"); + }); + + it("leaves unrelated coding tasks alone", () => { + const assessment = assessRoutingIntent("Run the tests and fix the failing build"); + + expect(assessment.intent).toBe("other"); + }); + }); + + describe("buildRoutingHint", () => { + it("returns a semantic routing hint when the index is ready", () => { + const hint = buildRoutingHint( + assessRoutingIntent("Where is the webhook validation logic?"), + { indexed: true, compatibility: { compatible: true } }, + ); + + expect(hint).toContain("prefer `codebase_peek`"); + expect(hint).toContain("`codebase_search`"); + expect(hint).toContain("`grep`"); + }); + + it("returns an index bootstrap hint when the index is missing", () => { + const hint = buildRoutingHint( + assessRoutingIntent("Which file handles retry backoff logic?"), + { indexed: false, compatibility: null }, + ); + + expect(hint).toContain("check `index_status` first"); + expect(hint).toContain("run `index_codebase`"); + }); + + it("returns null for non-conceptual intents", () => { + const hint = buildRoutingHint( + assessRoutingIntent("Find all references to validateToken"), + { indexed: true, compatibility: { compatible: true } }, + ); + + expect(hint).toBeNull(); + }); + + it("returns a definition-specific hint when the index is ready", () => { + const hint = buildRoutingHint( + assessRoutingIntent("Where is the payment handler defined?"), + { indexed: true, compatibility: { compatible: true } }, + ); + + expect(hint).toContain("prefer `implementation_lookup`"); + expect(hint).toContain("`codebase_search`"); + }); + + it("returns an index bootstrap hint for definition lookups when the index is missing", () => { + const hint = buildRoutingHint( + assessRoutingIntent("Where is the payment handler defined?"), + { indexed: false, compatibility: null }, + ); + + expect(hint).toContain("check `index_status` first"); + expect(hint).toContain("`implementation_lookup`"); + }); + }); + + describe("RoutingHintController", () => { + it("stores conceptual discovery state and emits one hint", async () => { + const controller = new RoutingHintController(async () => ({ + indexed: true, + compatibility: { compatible: true }, + })); + + controller.observeUserMessage("session-1", [{ type: "text", text: "Where is the auth flow implemented?" }]); + + const state = controller.getSessionState("session-1"); + expect(state?.assessment.intent).toBe("local_conceptual"); + expect(state?.pendingHint).toBe(true); + + const hints = await controller.getSystemHints("session-1"); + expect(hints).toHaveLength(1); + expect(hints[0]).toContain("prefer `codebase_peek`"); + }); + + it("stops nudging after a codebase tool is used", async () => { + const controller = new RoutingHintController(async () => ({ + indexed: true, + compatibility: { compatible: true }, + })); + + controller.observeUserMessage("session-2", [{ type: "text", text: "Find the code that validates webhook signatures" }]); + controller.markToolUsed("session-2", "codebase_peek"); + + const state = controller.getSessionState("session-2"); + expect(state?.pendingHint).toBe(false); + + const hints = await controller.getSystemHints("session-2"); + expect(hints).toEqual([]); + }); + + it("does not create hints for exact identifier lookups", async () => { + const controller = new RoutingHintController(async () => ({ + indexed: true, + compatibility: { compatible: true }, + })); + + controller.observeUserMessage("session-3", [{ type: "text", text: "Find all references to `validateToken`" }]); + + const hints = await controller.getSystemHints("session-3"); + expect(hints).toEqual([]); + }); + + it("stops nudging after implementation_lookup is used for definition requests", async () => { + const controller = new RoutingHintController(async () => ({ + indexed: true, + compatibility: { compatible: true }, + })); + + controller.observeUserMessage("session-4", [{ type: "text", text: "Where is the payment handler defined?" }]); + controller.markToolUsed("session-4", "implementation_lookup"); + + const state = controller.getSessionState("session-4"); + expect(state?.pendingHint).toBe(false); + + const hints = await controller.getSystemHints("session-4"); + expect(hints).toEqual([]); + }); + + it("falls back safely when index status lookup fails", async () => { + const controller = new RoutingHintController(async () => { + throw new Error("status unavailable"); + }); + + controller.observeUserMessage("session-5", [{ type: "text", text: "Where is the retry policy logic?" }]); + + const hints = await controller.getSystemHints("session-5"); + expect(hints).toHaveLength(1); + expect(hints[0]).toContain("check `index_status` first"); + }); + }); +});