@@ -141,22 +141,25 @@ end
141141function _M .read_response (ctx , res )
142142 local body_reader = res .body_reader
143143 if not body_reader then
144- core .log .error (" AI service sent no response body" )
144+ core .log .warn (" AI service sent no response body" )
145145 return 500
146146 end
147147
148148 local content_type = res .headers [" Content-Type" ]
149149 core .response .set_header (" Content-Type" , content_type )
150150
151- if core .string .find (content_type , " text/event-stream" ) then
151+ if content_type and core .string .find (content_type , " text/event-stream" ) then
152152 while true do
153153 local chunk , err = body_reader () -- will read chunk by chunk
154154 if err then
155- core .log .error (" failed to read response chunk: " , err )
156- break
155+ core .log .warn (" failed to read response chunk: " , err )
156+ if core .string .find (err , " timeout" ) then
157+ return 504
158+ end
159+ return 500
157160 end
158161 if not chunk then
159- break
162+ return
160163 end
161164
162165 ngx_print (chunk )
@@ -192,6 +195,8 @@ function _M.read_response(ctx, res)
192195
193196 -- usage field is null for non-last events, null is parsed as userdata type
194197 if data and data .usage and type (data .usage ) ~= " userdata" then
198+ core .log .info (" got token usage from ai service: " ,
199+ core .json .delay_encode (data .usage ))
195200 ctx .ai_token_usage = {
196201 prompt_tokens = data .usage .prompt_tokens or 0 ,
197202 completion_tokens = data .usage .completion_tokens or 0 ,
@@ -202,19 +207,22 @@ function _M.read_response(ctx, res)
202207
203208 :: CONTINUE::
204209 end
205- return
206210 end
207211
208212 local raw_res_body , err = res :read_body ()
209213 if not raw_res_body then
210- core .log .error (" failed to read response body: " , err )
214+ core .log .warn (" failed to read response body: " , err )
215+ if core .string .find (err , " timeout" ) then
216+ return 504
217+ end
211218 return 500
212219 end
213220 local res_body , err = core .json .decode (raw_res_body )
214221 if err then
215222 core .log .warn (" invalid response body from ai service: " , raw_res_body , " err: " , err ,
216223 " , it will cause token usage not available" )
217224 else
225+ core .log .info (" got token usage from ai service: " , core .json .delay_encode (res_body .usage ))
218226 ctx .ai_token_usage = {
219227 prompt_tokens = res_body .usage and res_body .usage .prompt_tokens or 0 ,
220228 completion_tokens = res_body .usage and res_body .usage .completion_tokens or 0 ,
0 commit comments