Skip to content
This repository was archived by the owner on May 20, 2026. It is now read-only.

Commit 17426d7

Browse files
authored
Add APIUsage tracking to extChatEndpoint
fix(ext-endpoint): propagate usage from DataPart stream in ExtensionContributedChatEndpoint The extension-contributed chat endpoint previously returned a hardcoded zeroed-out usage object on every successful response, causing the Copilot Chat context-window meter to never update when routing through third-party language model providers. Add a `usageFromDataPart()` helper that checks incoming `LanguageModelDataPart` chunks for a native usage shape (`prompt_tokens`, `completion_tokens`, `total_tokens` — all numbers) without keying on any MIME type. This mirrors the shape-based check already present in the non-provider (internal) streaming path. In `makeChatRequest2()`: - Declare `usage: APIUsage | undefined` before the stream loop. - After handling known MIME types (StatefulMarker, ContextManagement), call `usageFromDataPart()` on every DataPart and capture the result. - Return `usage ?? <zeroed fallback>` so existing behaviour is preserved when no usage chunk arrives. No new MIME branch is introduced; any provider that emits a DataPart whose JSON payload matches the OpenAI usage shape will automatically populate the meter.
1 parent 9e668cb commit 17426d7

1 file changed

Lines changed: 39 additions & 2 deletions

File tree

src/platform/endpoint/vscode-node/extChatEndpoint.ts

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import { ContextManagementResponse } from '../../networking/common/anthropic';
1818
import { FinishedCallback, OpenAiFunctionTool, OptionalChatRequestParams } from '../../networking/common/fetch';
1919
import { Response } from '../../networking/common/fetcherService';
2020
import { IChatEndpoint, ICreateEndpointBodyOptions, IEndpointBody, IMakeChatRequestOptions } from '../../networking/common/networking';
21-
import { ChatCompletion } from '../../networking/common/openai';
21+
import { APIUsage, ChatCompletion } from '../../networking/common/openai';
2222
import { IOTelService } from '../../otel/common/otelService';
2323
import { retrieveCapturingTokenByCorrelation, storeCapturingTokenForCorrelation } from '../../requestLogger/node/requestLogger';
2424
import { ITelemetryService } from '../../telemetry/common/telemetry';
@@ -37,6 +37,37 @@ enum ChatImageMimeType {
3737
BMP = 'image/bmp',
3838
}
3939

40+
function usageFromDataPart(part: vscode.LanguageModelDataPart): APIUsage | undefined {
41+
try {
42+
const parsed = JSON.parse(new TextDecoder().decode(part.data)) as {
43+
prompt_tokens?: unknown;
44+
completion_tokens?: unknown;
45+
total_tokens?: unknown;
46+
prompt_tokens_details?: unknown;
47+
};
48+
49+
if (
50+
typeof parsed.prompt_tokens === 'number' &&
51+
typeof parsed.completion_tokens === 'number' &&
52+
typeof parsed.total_tokens === 'number'
53+
) {
54+
return {
55+
prompt_tokens: parsed.prompt_tokens,
56+
completion_tokens: parsed.completion_tokens,
57+
total_tokens: parsed.total_tokens,
58+
prompt_tokens_details:
59+
parsed.prompt_tokens_details && typeof parsed.prompt_tokens_details === 'object'
60+
? (parsed.prompt_tokens_details as APIUsage['prompt_tokens_details'])
61+
: { cached_tokens: 0 },
62+
};
63+
}
64+
} catch {
65+
// Ignore non-JSON or non-usage DataParts.
66+
}
67+
68+
return undefined;
69+
}
70+
4071
export class ExtensionContributedChatEndpoint implements IChatEndpoint {
4172
private readonly _maxTokens: number;
4273
public readonly isDefault: boolean = false;
@@ -204,6 +235,7 @@ export class ExtensionContributedChatEndpoint implements IChatEndpoint {
204235
const response = await this.languageModel.sendRequest(vscodeMessages, vscodeOptions, token);
205236
let text = '';
206237
let numToolsCalled = 0;
238+
let usage: APIUsage | undefined;
207239
const requestId = ourRequestId;
208240

209241
// consume stream
@@ -231,6 +263,11 @@ export class ExtensionContributedChatEndpoint implements IChatEndpoint {
231263
const contextManagement = JSON.parse(new TextDecoder().decode(chunk.data)) as ContextManagementResponse;
232264
await streamRecorder.callback?.(text, 0, { text: '', contextManagement });
233265
}
266+
// Mirror native usage-only chunk handling by shape, not MIME.
267+
const maybeUsage = usageFromDataPart(chunk);
268+
if (maybeUsage) {
269+
usage = maybeUsage;
270+
}
234271
} else if (chunk instanceof vscode.LanguageModelThinkingPart) {
235272
if (streamRecorder.callback) {
236273
await streamRecorder.callback(text, 0, {
@@ -250,7 +287,7 @@ export class ExtensionContributedChatEndpoint implements IChatEndpoint {
250287
type: ChatFetchResponseType.Success,
251288
requestId,
252289
serverRequestId: requestId,
253-
usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0, prompt_tokens_details: { cached_tokens: 0 } },
290+
usage: usage ?? { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0, prompt_tokens_details: { cached_tokens: 0 } },
254291
value: text,
255292
resolvedModel: this.languageModel.id
256293
};

0 commit comments

Comments
 (0)