@@ -19,7 +19,7 @@ import { ILogService } from '../../log/common/logService';
1919import { FinishedCallback , IResponseDelta , OpenAiResponsesFunctionTool } from '../../networking/common/fetch' ;
2020import { IChatEndpoint , ICreateEndpointBodyOptions , IEndpointBody } from '../../networking/common/networking' ;
2121import { ChatCompletion , FinishedCompletionReason , modelsWithoutResponsesContextManagement , openAIContextManagementCompactionType , OpenAIContextManagementResponse , rawMessageToCAPI , TokenLogProb } from '../../networking/common/openai' ;
22- import { sendEngineMessagesTelemetry } from '../../networking/node/chatStream' ;
22+ import { sendEngineMessagesTelemetry , sendResponsesApiCompactionTelemetry } from '../../networking/node/chatStream' ;
2323import { IExperimentationService } from '../../telemetry/common/nullExperimentationService' ;
2424import { ITelemetryService } from '../../telemetry/common/telemetry' ;
2525import { TelemetryData } from '../../telemetry/common/telemetryData' ;
@@ -35,22 +35,21 @@ export function getResponsesApiCompactionThreshold(configService: IConfiguration
3535 return undefined ;
3636 }
3737
38- return 1000 ;
39-
40- // return endpoint.modelMaxPromptTokens > 0
41- // ? Math.floor(endpoint.modelMaxPromptTokens * 0.9)
42- // : 50000;
38+ return endpoint . modelMaxPromptTokens > 0
39+ ? Math . floor ( endpoint . modelMaxPromptTokens * 0.9 )
40+ : 50000 ;
4341}
4442
4543export function createResponsesRequestBody ( accessor : ServicesAccessor , options : ICreateEndpointBodyOptions , model : string , endpoint : IChatEndpoint ) : IEndpointBody {
4644 const configService = accessor . get ( IConfigurationService ) ;
4745 const expService = accessor . get ( IExperimentationService ) ;
4846 const verbosity = getVerbosityForModelSync ( endpoint ) ;
47+ const compactThreshold = getResponsesApiCompactionThreshold ( configService , expService , endpoint ) ;
4948 // compaction supported for all the models but works well for codex models and any future models after 5.3
5049
5150 const body : IEndpointBody = {
5251 model,
53- ...rawMessagesToResponseAPI ( model , options . messages , ! ! options . ignoreStatefulMarker ) ,
52+ ...rawMessagesToResponseAPI ( model , options . messages , ! ! options . ignoreStatefulMarker , ! ! options . useWebSocket , compactThreshold !== undefined ) ,
5453 stream : true ,
5554 tools : options . requestOptions ?. tools ?. map ( ( tool ) : OpenAI . Responses . FunctionTool & OpenAiResponsesFunctionTool => ( {
5655 ...tool . function ,
@@ -69,7 +68,6 @@ export function createResponsesRequestBody(accessor: ServicesAccessor, options:
6968 text : verbosity ? { verbosity } : undefined ,
7069 } ;
7170
72- const compactThreshold = getResponsesApiCompactionThreshold ( configService , expService , endpoint ) ;
7371 if ( compactThreshold !== undefined ) {
7472 body . context_management = [ {
7573 'type' : openAIContextManagementCompactionType ,
@@ -103,6 +101,21 @@ export function createResponsesRequestBody(accessor: ServicesAccessor, options:
103101 return body ;
104102}
105103
104+ export function getResponsesApiCompactionThresholdFromBody ( body : Pick < IEndpointBody , 'context_management' > ) : number | undefined {
105+ const contextManagement = body . context_management ;
106+ if ( ! Array . isArray ( contextManagement ) ) {
107+ return undefined ;
108+ }
109+
110+ for ( const item of contextManagement ) {
111+ if ( item . type === openAIContextManagementCompactionType && typeof item . compact_threshold === 'number' ) {
112+ return item . compact_threshold ;
113+ }
114+ }
115+
116+ return undefined ;
117+ }
118+
106119type ResponseOutputMessageWithPhase = OpenAI . Responses . ResponseOutputMessage & {
107120 phase ?: string ;
108121} ;
@@ -111,21 +124,23 @@ interface ResponseOutputItemWithPhase {
111124 phase ?: string ;
112125}
113126
114- function rawMessagesToResponseAPI ( modelId : string , messages : readonly Raw . ChatMessage [ ] , ignoreStatefulMarker : boolean ) : { input : OpenAI . Responses . ResponseInputItem [ ] ; previous_response_id ?: string } {
127+ function rawMessagesToResponseAPI ( modelId : string , messages : readonly Raw . ChatMessage [ ] , ignoreStatefulMarker : boolean , useWebSocket : boolean , compactionEnabled : boolean ) : { input : OpenAI . Responses . ResponseInputItem [ ] ; previous_response_id ?: string } {
115128 const latestCompactionMessageIndex = getLatestCompactionMessageIndex ( messages ) ;
116- if ( latestCompactionMessageIndex !== undefined ) {
117- messages = messages . slice ( latestCompactionMessageIndex ) ;
118- }
119-
120129 const statefulMarkerAndIndex = ! ignoreStatefulMarker && getStatefulMarkerAndIndex ( modelId , messages ) ;
130+
121131 let previousResponseId : string | undefined ;
122132 if ( statefulMarkerAndIndex ) {
123133 previousResponseId = statefulMarkerAndIndex . statefulMarker ;
124- if ( latestCompactionMessageIndex === undefined ) {
134+ // this for BYOK scenarios where currently gpt5.3+ models are not yet supported.
135+ if ( ( ! useWebSocket || ! compactionEnabled ) && latestCompactionMessageIndex === undefined ) {
125136 messages = messages . slice ( statefulMarkerAndIndex . index + 1 ) ;
126137 }
127138 }
128139
140+ if ( latestCompactionMessageIndex !== undefined ) {
141+ messages = messages . slice ( latestCompactionMessageIndex ) ;
142+ }
143+
129144 const input : OpenAI . Responses . ResponseInputItem [ ] = [ ] ;
130145 for ( const message of messages ) {
131146 switch ( message . role ) {
@@ -442,7 +457,7 @@ export async function processResponseFromChatEndpoint(instantiationService: IIns
442457 return new AsyncIterableObject < ChatCompletion > ( async feed => {
443458 const requestId = response . headers . get ( 'X-Request-ID' ) ?? generateUuid ( ) ;
444459 const ghRequestId = response . headers . get ( 'x-github-request-id' ) ?? '' ;
445- const processor = instantiationService . createInstance ( OpenAIResponsesProcessor , telemetryData , requestId , ghRequestId , ( message : string ) => logService . info ( message ) , compactionThreshold ) ;
460+ const processor = instantiationService . createInstance ( OpenAIResponsesProcessor , telemetryData , telemetryService , requestId , ghRequestId , ( message : string ) => logService . info ( message ) , compactionThreshold ) ;
446461 const parser = new SSEParser ( ( ev ) => {
447462 try {
448463 logService . trace ( `SSE: ${ ev . data } ` ) ;
@@ -491,6 +506,7 @@ export class OpenAIResponsesProcessor {
491506
492507 constructor (
493508 private readonly telemetryData : TelemetryData ,
509+ private readonly telemetryService : ITelemetryService ,
494510 private readonly requestId : string ,
495511 private readonly ghRequestId : string ,
496512 private readonly logInfo : ( message : string ) => void ,
@@ -605,10 +621,31 @@ export class OpenAIResponsesProcessor {
605621 }
606622 } ) ;
607623 case 'response.completed' :
608- if ( this . sawCompactionMessage ) {
624+ if ( this . compactionThreshold !== undefined && this . sawCompactionMessage ) {
625+ sendResponsesApiCompactionTelemetry ( this . telemetryService , {
626+ outcome : 'compaction_returned' ,
627+ headerRequestId : this . requestId ,
628+ gitHubRequestId : this . ghRequestId ,
629+ model : chunk . response . model ,
630+ } , {
631+ compactThreshold : this . compactionThreshold ,
632+ promptTokens : chunk . response . usage ?. input_tokens ?? 0 ,
633+ totalTokens : chunk . response . usage ?. total_tokens ?? 0 ,
634+ } ) ;
609635 this . logInfo ( `[responsesAPI_compaction] OpenAI returned compaction item. headerRequestId=${ this . requestId } ghRequestId=${ this . ghRequestId || 'unknown' } completionId=${ chunk . response . id } createdAt=${ chunk . response . created_at } compactionMessageId=${ this . compactionMessageId ?? 'unknown' } compactThreshold=${ this . compactionThreshold ?? - 1 } promptTokens=${ chunk . response . usage ?. input_tokens ?? 0 } totalTokens=${ chunk . response . usage ?. total_tokens ?? 0 } ` ) ;
610636 } else if ( this . compactionThreshold !== undefined && ( chunk . response . usage ?. input_tokens ?? 0 ) >= this . compactionThreshold ) {
611637 const outputTypes = chunk . response . output . map ( item => item . type ) . join ( ',' ) ;
638+ sendResponsesApiCompactionTelemetry ( this . telemetryService , {
639+ outcome : 'threshold_met_no_compaction' ,
640+ headerRequestId : this . requestId ,
641+ gitHubRequestId : this . ghRequestId ,
642+ model : chunk . response . model ,
643+ outputTypes : outputTypes || 'none' ,
644+ } , {
645+ compactThreshold : this . compactionThreshold ,
646+ promptTokens : chunk . response . usage ?. input_tokens ?? 0 ,
647+ totalTokens : chunk . response . usage ?. total_tokens ?? 0 ,
648+ } ) ;
612649 this . logInfo ( `[responsesAPI_compaction] Context management is enabled and compact threshold was met, but no compaction item was returned in the response output. headerRequestId=${ this . requestId } ghRequestId=${ this . ghRequestId || 'unknown' } completionId=${ chunk . response . id } createdAt=${ chunk . response . created_at } compactThreshold=${ this . compactionThreshold } promptTokens=${ chunk . response . usage ?. input_tokens ?? 0 } totalTokens=${ chunk . response . usage ?. total_tokens ?? 0 } outputTypes=${ outputTypes || 'none' } ` ) ;
613650 }
614651 onProgress ( { text : '' , statefulMarker : chunk . response . id } ) ;
0 commit comments