fix: add proper rate limiting for GitHub Models API (15 req/min)

Helweg · Helweg · commit e80cb3da4cbe · 2026-01-16T13:34:56.000+01:00
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "opencode-codebase-index",
-  "version": "0.2.3",
+  "version": "0.2.4",
   "description": "Semantic codebase indexing and search for OpenCode - find code by meaning, not just keywords",
   "type": "module",
   "main": "dist/index.js",
diff --git a/src/indexer/index.ts b/src/indexer/index.ts
@@ -498,11 +498,19 @@ export class Indexer {
       }
     }
 
-    const queue = new PQueue({ concurrency: 3 });
+    // GitHub Models API rate limit: 15 requests/minute for embeddings
+    // Use concurrency 1 with 4-second delay to stay under limit (15 req/min = 1 req per 4 sec)
+    const queue = new PQueue({ concurrency: 1, interval: 4000, intervalCap: 1 });
     const dynamicBatches = createDynamicBatches(chunksNeedingEmbedding);
+    let rateLimitBackoffMs = 0;
 
     for (const batch of dynamicBatches) {
       queue.add(async () => {
+        // Additional backoff if we've been rate limited
+        if (rateLimitBackoffMs > 0) {
+          await new Promise(resolve => setTimeout(resolve, rateLimitBackoffMs));
+        }
+        
         try {
           const result = await pRetry(
             async () => {
@@ -511,15 +519,16 @@ export class Indexer {
             },
             {
               retries: this.config.indexing.retries,
-              minTimeout: this.config.indexing.retryDelayMs,
-              maxTimeout: 30000,
+              minTimeout: Math.max(this.config.indexing.retryDelayMs, 5000), // Minimum 5s between retries
+              maxTimeout: 60000, // Max 60s backoff
               factor: 2,
               onFailedAttempt: (error) => {
                 const message = getErrorMessage(error);
                 if (isRateLimitError(error)) {
-                  queue.concurrency = 1;
+                  // Exponential backoff: 10s, 20s, 40s...
+                  rateLimitBackoffMs = Math.min(60000, (rateLimitBackoffMs || 5000) * 2);
                   console.error(
-                    `Rate limited (attempt ${error.attemptNumber}/${error.retriesLeft + error.attemptNumber}): waiting before retry...`
+                    `Rate limited (attempt ${error.attemptNumber}/${error.retriesLeft + error.attemptNumber}): waiting ${rateLimitBackoffMs / 1000}s before retry...`
                   );
                 } else {
                   console.error(
@@ -529,6 +538,11 @@ export class Indexer {
               },
             }
           );
+          
+          // Success - gradually reduce backoff
+          if (rateLimitBackoffMs > 0) {
+            rateLimitBackoffMs = Math.max(0, rateLimitBackoffMs - 2000);
+          }
 
           const items = batch.map((chunk, idx) => ({
             id: chunk.id,

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "opencode-codebase-index",`
`3`		`- "version": "0.2.3",`
	`3`	`+ "version": "0.2.4",`
`4`	`4`	`"description": "Semantic codebase indexing and search for OpenCode - find code by meaning, not just keywords",`
`5`	`5`	`"type": "module",`
`6`	`6`	`"main": "dist/index.js",`