Skip to content
This repository was archived by the owner on May 20, 2026. It is now read-only.

Commit 4a31c97

Browse files
committed
Gate Responses API compaction behavior and telemetry
1 parent b674b42 commit 4a31c97

5 files changed

Lines changed: 435 additions & 21 deletions

File tree

src/extension/externalAgents/node/oaiLanguageModelServer.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import type OpenAI from 'openai';
1010
import { IChatMLFetcher, Source } from '../../../platform/chat/common/chatMLFetcher';
1111
import { ChatLocation, ChatResponse } from '../../../platform/chat/common/commonTypes';
1212
import { CustomModel, EndpointEditToolName, IEndpointProvider } from '../../../platform/endpoint/common/endpointProvider';
13-
import { OpenAIResponsesProcessor, responseApiInputToRawMessagesForLogging } from '../../../platform/endpoint/node/responsesApi';
13+
import { getResponsesApiCompactionThresholdFromBody, OpenAIResponsesProcessor, responseApiInputToRawMessagesForLogging } from '../../../platform/endpoint/node/responsesApi';
1414
import { ILogService } from '../../../platform/log/common/logService';
1515
import { FinishedCallback, OptionalChatRequestParams } from '../../../platform/networking/common/fetch';
1616
import { Response } from '../../../platform/networking/common/fetcherService';
@@ -455,7 +455,7 @@ class StreamingPassThroughEndpoint implements IChatEndpoint {
455455
// We parse the stream just to return a correct ChatCompletion for logging the response and token usage details.
456456
const requestId = response.headers.get('X-Request-ID') ?? generateUuid();
457457
const ghRequestId = response.headers.get('x-github-request-id') ?? '';
458-
const processor = this.instantiationService.createInstance(OpenAIResponsesProcessor, telemetryData, requestId, ghRequestId, (message: string) => logService.info(message));
458+
const processor = this.instantiationService.createInstance(OpenAIResponsesProcessor, telemetryData, telemetryService, requestId, ghRequestId, (message: string) => logService.info(message), getResponsesApiCompactionThresholdFromBody(this.requestBody));
459459
const parser = new SSEParser((ev) => {
460460
try {
461461
logService.trace(`[StreamingPassThroughEndpoint] SSE: ${ev.data}`);

src/extension/prompt/node/chatMLFetcher.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import { IInteractionService } from '../../../platform/chat/common/interactionSe
1717
import { ConfigKey, HARD_TOOL_LIMIT, IConfigurationService } from '../../../platform/configuration/common/configurationService';
1818
import { ICAPIClientService } from '../../../platform/endpoint/common/capiClient';
1919
import { isAutoModel } from '../../../platform/endpoint/node/autoChatEndpoint';
20-
import { OpenAIResponsesProcessor, responseApiInputToRawMessagesForLogging, sendCompletionOutputTelemetry } from '../../../platform/endpoint/node/responsesApi';
20+
import { getResponsesApiCompactionThresholdFromBody, OpenAIResponsesProcessor, responseApiInputToRawMessagesForLogging, sendCompletionOutputTelemetry } from '../../../platform/endpoint/node/responsesApi';
2121
import { collectSingleLineErrorMessage, ILogService } from '../../../platform/log/common/logService';
2222
import { isAnthropicToolSearchEnabled } from '../../../platform/networking/common/anthropic';
2323
import { FinishedCallback, getRequestId, IResponseDelta, OptionalChatRequestParams, RequestId } from '../../../platform/networking/common/fetch';
@@ -1093,7 +1093,7 @@ export class ChatMLFetcherImpl extends AbstractChatMLFetcher {
10931093
const handle = connection.sendRequest(request, { userInitiated: !!userInitiatedRequest, turnId }, cancellationToken);
10941094

10951095
const extendedBaseTelemetryData = baseTelemetryData.extendedBy({ modelCallId });
1096-
const processor = this._instantiationService.createInstance(OpenAIResponsesProcessor, extendedBaseTelemetryData, modelRequestId.headerRequestId, modelRequestId.gitHubRequestId, (message: string) => this._logService.info(message));
1096+
const processor = this._instantiationService.createInstance(OpenAIResponsesProcessor, extendedBaseTelemetryData, this._telemetryService, modelRequestId.headerRequestId, modelRequestId.gitHubRequestId, (message: string) => this._logService.info(message), getResponsesApiCompactionThresholdFromBody(request));
10971097

10981098
// Set up streaming first so event listeners are registered before we
10991099
// await the first event — AsyncIterableObject runs its executor eagerly.

src/platform/endpoint/node/responsesApi.ts

Lines changed: 53 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ import { ILogService } from '../../log/common/logService';
1919
import { FinishedCallback, IResponseDelta, OpenAiResponsesFunctionTool } from '../../networking/common/fetch';
2020
import { IChatEndpoint, ICreateEndpointBodyOptions, IEndpointBody } from '../../networking/common/networking';
2121
import { ChatCompletion, FinishedCompletionReason, modelsWithoutResponsesContextManagement, openAIContextManagementCompactionType, OpenAIContextManagementResponse, rawMessageToCAPI, TokenLogProb } from '../../networking/common/openai';
22-
import { sendEngineMessagesTelemetry } from '../../networking/node/chatStream';
22+
import { sendEngineMessagesTelemetry, sendResponsesApiCompactionTelemetry } from '../../networking/node/chatStream';
2323
import { IExperimentationService } from '../../telemetry/common/nullExperimentationService';
2424
import { ITelemetryService } from '../../telemetry/common/telemetry';
2525
import { TelemetryData } from '../../telemetry/common/telemetryData';
@@ -35,22 +35,21 @@ export function getResponsesApiCompactionThreshold(configService: IConfiguration
3535
return undefined;
3636
}
3737

38-
return 1000;
39-
40-
// return endpoint.modelMaxPromptTokens > 0
41-
// ? Math.floor(endpoint.modelMaxPromptTokens * 0.9)
42-
// : 50000;
38+
return endpoint.modelMaxPromptTokens > 0
39+
? Math.floor(endpoint.modelMaxPromptTokens * 0.9)
40+
: 50000;
4341
}
4442

4543
export function createResponsesRequestBody(accessor: ServicesAccessor, options: ICreateEndpointBodyOptions, model: string, endpoint: IChatEndpoint): IEndpointBody {
4644
const configService = accessor.get(IConfigurationService);
4745
const expService = accessor.get(IExperimentationService);
4846
const verbosity = getVerbosityForModelSync(endpoint);
47+
const compactThreshold = getResponsesApiCompactionThreshold(configService, expService, endpoint);
4948
// compaction supported for all the models but works well for codex models and any future models after 5.3
5049

5150
const body: IEndpointBody = {
5251
model,
53-
...rawMessagesToResponseAPI(model, options.messages, !!options.ignoreStatefulMarker),
52+
...rawMessagesToResponseAPI(model, options.messages, !!options.ignoreStatefulMarker, !!options.useWebSocket, compactThreshold !== undefined),
5453
stream: true,
5554
tools: options.requestOptions?.tools?.map((tool): OpenAI.Responses.FunctionTool & OpenAiResponsesFunctionTool => ({
5655
...tool.function,
@@ -69,7 +68,6 @@ export function createResponsesRequestBody(accessor: ServicesAccessor, options:
6968
text: verbosity ? { verbosity } : undefined,
7069
};
7170

72-
const compactThreshold = getResponsesApiCompactionThreshold(configService, expService, endpoint);
7371
if (compactThreshold !== undefined) {
7472
body.context_management = [{
7573
'type': openAIContextManagementCompactionType,
@@ -103,6 +101,21 @@ export function createResponsesRequestBody(accessor: ServicesAccessor, options:
103101
return body;
104102
}
105103

104+
export function getResponsesApiCompactionThresholdFromBody(body: Pick<IEndpointBody, 'context_management'>): number | undefined {
105+
const contextManagement = body.context_management;
106+
if (!Array.isArray(contextManagement)) {
107+
return undefined;
108+
}
109+
110+
for (const item of contextManagement) {
111+
if (item.type === openAIContextManagementCompactionType && typeof item.compact_threshold === 'number') {
112+
return item.compact_threshold;
113+
}
114+
}
115+
116+
return undefined;
117+
}
118+
106119
type ResponseOutputMessageWithPhase = OpenAI.Responses.ResponseOutputMessage & {
107120
phase?: string;
108121
};
@@ -111,21 +124,23 @@ interface ResponseOutputItemWithPhase {
111124
phase?: string;
112125
}
113126

114-
function rawMessagesToResponseAPI(modelId: string, messages: readonly Raw.ChatMessage[], ignoreStatefulMarker: boolean): { input: OpenAI.Responses.ResponseInputItem[]; previous_response_id?: string } {
127+
function rawMessagesToResponseAPI(modelId: string, messages: readonly Raw.ChatMessage[], ignoreStatefulMarker: boolean, useWebSocket: boolean, compactionEnabled: boolean): { input: OpenAI.Responses.ResponseInputItem[]; previous_response_id?: string } {
115128
const latestCompactionMessageIndex = getLatestCompactionMessageIndex(messages);
116-
if (latestCompactionMessageIndex !== undefined) {
117-
messages = messages.slice(latestCompactionMessageIndex);
118-
}
119-
120129
const statefulMarkerAndIndex = !ignoreStatefulMarker && getStatefulMarkerAndIndex(modelId, messages);
130+
121131
let previousResponseId: string | undefined;
122132
if (statefulMarkerAndIndex) {
123133
previousResponseId = statefulMarkerAndIndex.statefulMarker;
124-
if (latestCompactionMessageIndex === undefined) {
134+
// this for BYOK scenarios where currently gpt5.3+ models are not yet supported.
135+
if ((!useWebSocket || !compactionEnabled) && latestCompactionMessageIndex === undefined) {
125136
messages = messages.slice(statefulMarkerAndIndex.index + 1);
126137
}
127138
}
128139

140+
if (latestCompactionMessageIndex !== undefined) {
141+
messages = messages.slice(latestCompactionMessageIndex);
142+
}
143+
129144
const input: OpenAI.Responses.ResponseInputItem[] = [];
130145
for (const message of messages) {
131146
switch (message.role) {
@@ -442,7 +457,7 @@ export async function processResponseFromChatEndpoint(instantiationService: IIns
442457
return new AsyncIterableObject<ChatCompletion>(async feed => {
443458
const requestId = response.headers.get('X-Request-ID') ?? generateUuid();
444459
const ghRequestId = response.headers.get('x-github-request-id') ?? '';
445-
const processor = instantiationService.createInstance(OpenAIResponsesProcessor, telemetryData, requestId, ghRequestId, (message: string) => logService.info(message), compactionThreshold);
460+
const processor = instantiationService.createInstance(OpenAIResponsesProcessor, telemetryData, telemetryService, requestId, ghRequestId, (message: string) => logService.info(message), compactionThreshold);
446461
const parser = new SSEParser((ev) => {
447462
try {
448463
logService.trace(`SSE: ${ev.data}`);
@@ -491,6 +506,7 @@ export class OpenAIResponsesProcessor {
491506

492507
constructor(
493508
private readonly telemetryData: TelemetryData,
509+
private readonly telemetryService: ITelemetryService,
494510
private readonly requestId: string,
495511
private readonly ghRequestId: string,
496512
private readonly logInfo: (message: string) => void,
@@ -605,10 +621,31 @@ export class OpenAIResponsesProcessor {
605621
}
606622
});
607623
case 'response.completed':
608-
if (this.sawCompactionMessage) {
624+
if (this.compactionThreshold !== undefined && this.sawCompactionMessage) {
625+
sendResponsesApiCompactionTelemetry(this.telemetryService, {
626+
outcome: 'compaction_returned',
627+
headerRequestId: this.requestId,
628+
gitHubRequestId: this.ghRequestId,
629+
model: chunk.response.model,
630+
}, {
631+
compactThreshold: this.compactionThreshold,
632+
promptTokens: chunk.response.usage?.input_tokens ?? 0,
633+
totalTokens: chunk.response.usage?.total_tokens ?? 0,
634+
});
609635
this.logInfo(`[responsesAPI_compaction] OpenAI returned compaction item. headerRequestId=${this.requestId} ghRequestId=${this.ghRequestId || 'unknown'} completionId=${chunk.response.id} createdAt=${chunk.response.created_at} compactionMessageId=${this.compactionMessageId ?? 'unknown'} compactThreshold=${this.compactionThreshold ?? -1} promptTokens=${chunk.response.usage?.input_tokens ?? 0} totalTokens=${chunk.response.usage?.total_tokens ?? 0}`);
610636
} else if (this.compactionThreshold !== undefined && (chunk.response.usage?.input_tokens ?? 0) >= this.compactionThreshold) {
611637
const outputTypes = chunk.response.output.map(item => item.type).join(',');
638+
sendResponsesApiCompactionTelemetry(this.telemetryService, {
639+
outcome: 'threshold_met_no_compaction',
640+
headerRequestId: this.requestId,
641+
gitHubRequestId: this.ghRequestId,
642+
model: chunk.response.model,
643+
outputTypes: outputTypes || 'none',
644+
}, {
645+
compactThreshold: this.compactionThreshold,
646+
promptTokens: chunk.response.usage?.input_tokens ?? 0,
647+
totalTokens: chunk.response.usage?.total_tokens ?? 0,
648+
});
612649
this.logInfo(`[responsesAPI_compaction] Context management is enabled and compact threshold was met, but no compaction item was returned in the response output. headerRequestId=${this.requestId} ghRequestId=${this.ghRequestId || 'unknown'} completionId=${chunk.response.id} createdAt=${chunk.response.created_at} compactThreshold=${this.compactionThreshold} promptTokens=${chunk.response.usage?.input_tokens ?? 0} totalTokens=${chunk.response.usage?.total_tokens ?? 0} outputTypes=${outputTypes || 'none'}`);
613650
}
614651
onProgress({ text: '', statefulMarker: chunk.response.id });

0 commit comments

Comments
 (0)