Add APIUsage tracking to extChatEndpoint

Shinrai · web-flow · commit 17426d7a08ed · 2026-05-03T11:52:51.000-07:00
fix(ext-endpoint): propagate usage from DataPart stream in ExtensionContributedChatEndpoint

The extension-contributed chat endpoint previously returned a hardcoded
zeroed-out usage object on every successful response, causing the
Copilot Chat context-window meter to never update when routing through
third-party language model providers.

Add a `usageFromDataPart()` helper that checks incoming
`LanguageModelDataPart` chunks for a native usage shape
(`prompt_tokens`, `completion_tokens`, `total_tokens` — all numbers)
without keying on any MIME type. This mirrors the shape-based check
already present in the non-provider (internal) streaming path.

In `makeChatRequest2()`:
- Declare `usage: APIUsage | undefined` before the stream loop.
- After handling known MIME types (StatefulMarker, ContextManagement),
  call `usageFromDataPart()` on every DataPart and capture the result.
- Return `usage ?? &lt;zeroed fallback&gt;` so existing behaviour is
  preserved when no usage chunk arrives.

No new MIME branch is introduced; any provider that emits a DataPart
whose JSON payload matches the OpenAI usage shape will automatically
populate the meter.
diff --git a/src/platform/endpoint/vscode-node/extChatEndpoint.ts b/src/platform/endpoint/vscode-node/extChatEndpoint.ts
@@ -18,7 +18,7 @@ import { ContextManagementResponse } from '../../networking/common/anthropic';
 import { FinishedCallback, OpenAiFunctionTool, OptionalChatRequestParams } from '../../networking/common/fetch';
 import { Response } from '../../networking/common/fetcherService';
 import { IChatEndpoint, ICreateEndpointBodyOptions, IEndpointBody, IMakeChatRequestOptions } from '../../networking/common/networking';
-import { ChatCompletion } from '../../networking/common/openai';
+import { APIUsage, ChatCompletion } from '../../networking/common/openai';
 import { IOTelService } from '../../otel/common/otelService';
 import { retrieveCapturingTokenByCorrelation, storeCapturingTokenForCorrelation } from '../../requestLogger/node/requestLogger';
 import { ITelemetryService } from '../../telemetry/common/telemetry';
@@ -37,6 +37,37 @@ enum ChatImageMimeType {
 	BMP = 'image/bmp',
 }
 
+function usageFromDataPart(part: vscode.LanguageModelDataPart): APIUsage | undefined {
+	try {
+		const parsed = JSON.parse(new TextDecoder().decode(part.data)) as {
+			prompt_tokens?: unknown;
+			completion_tokens?: unknown;
+			total_tokens?: unknown;
+			prompt_tokens_details?: unknown;
+		};
+
+		if (
+			typeof parsed.prompt_tokens === 'number' &&
+			typeof parsed.completion_tokens === 'number' &&
+			typeof parsed.total_tokens === 'number'
+		) {
+			return {
+				prompt_tokens: parsed.prompt_tokens,
+				completion_tokens: parsed.completion_tokens,
+				total_tokens: parsed.total_tokens,
+				prompt_tokens_details:
+					parsed.prompt_tokens_details && typeof parsed.prompt_tokens_details === 'object'
+						? (parsed.prompt_tokens_details as APIUsage['prompt_tokens_details'])
+						: { cached_tokens: 0 },
+			};
+		}
+	} catch {
+		// Ignore non-JSON or non-usage DataParts.
+	}
+
+	return undefined;
+}
+
 export class ExtensionContributedChatEndpoint implements IChatEndpoint {
 	private readonly _maxTokens: number;
 	public readonly isDefault: boolean = false;
@@ -204,6 +235,7 @@ export class ExtensionContributedChatEndpoint implements IChatEndpoint {
 			const response = await this.languageModel.sendRequest(vscodeMessages, vscodeOptions, token);
 			let text = '';
 			let numToolsCalled = 0;
+			let usage: APIUsage | undefined;
 			const requestId = ourRequestId;
 
 			// consume stream
@@ -231,6 +263,11 @@ export class ExtensionContributedChatEndpoint implements IChatEndpoint {
 						const contextManagement = JSON.parse(new TextDecoder().decode(chunk.data)) as ContextManagementResponse;
 						await streamRecorder.callback?.(text, 0, { text: '', contextManagement });
 					}
+					// Mirror native usage-only chunk handling by shape, not MIME.
+					const maybeUsage = usageFromDataPart(chunk);
+					if (maybeUsage) {
+						usage = maybeUsage;
+					}
 				} else if (chunk instanceof vscode.LanguageModelThinkingPart) {
 					if (streamRecorder.callback) {
 						await streamRecorder.callback(text, 0, {
@@ -250,7 +287,7 @@ export class ExtensionContributedChatEndpoint implements IChatEndpoint {
 					type: ChatFetchResponseType.Success,
 					requestId,
 					serverRequestId: requestId,
-					usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0, prompt_tokens_details: { cached_tokens: 0 } },
+					usage: usage ?? { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0, prompt_tokens_details: { cached_tokens: 0 } },
 					value: text,
 					resolvedModel: this.languageModel.id
 				};