Skip to content

Commit 1561ab6

Browse files
Merge branch 'master' of github.com:apache/apisix into cleanup-ai-proxy
2 parents 7aa3f24 + 428f43a commit 1561ab6

File tree

6 files changed

+190
-20
lines changed

6 files changed

+190
-20
lines changed

apisix/plugins/ai-proxy-multi.lua

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ local pcall = pcall
2525
local ipairs = ipairs
2626
local type = type
2727

28-
local internal_server_error = ngx.HTTP_INTERNAL_SERVER_ERROR
2928
local priority_balancer = require("apisix.balancer.priority")
3029

3130
local pickers = {}
@@ -157,30 +156,48 @@ local function pick_target(ctx, conf, ups_tab)
157156
create_server_picker, conf, ups_tab)
158157
end
159158
if not server_picker then
160-
return internal_server_error, "failed to fetch server picker"
159+
return nil, nil, "failed to fetch server picker"
161160
end
161+
ctx.server_picker = server_picker
162162

163-
local instance_name = server_picker.get(ctx)
164-
local instance_conf = get_instance_conf(conf.instances, instance_name)
165-
163+
local instance_name, err = server_picker.get(ctx)
164+
if err then
165+
return nil, nil, err
166+
end
166167
ctx.balancer_server = instance_name
167-
ctx.server_picker = server_picker
168+
if conf.fallback_strategy == "instance_health_and_rate_limiting" then
169+
local ai_rate_limiting = require("apisix.plugins.ai-rate-limiting")
170+
for _ = 1, #conf.instances do
171+
if ai_rate_limiting.check_instance_status(nil, ctx, instance_name) then
172+
break
173+
end
174+
core.log.info("ai instance: ", instance_name,
175+
" is not available, try to pick another one")
176+
server_picker.after_balance(ctx, true)
177+
instance_name, err = server_picker.get(ctx)
178+
if err then
179+
return nil, nil, err
180+
end
181+
ctx.balancer_server = instance_name
182+
end
183+
end
168184

185+
local instance_conf = get_instance_conf(conf.instances, instance_name)
169186
return instance_name, instance_conf
170187
end
171188

172189

173190
local function pick_ai_instance(ctx, conf, ups_tab)
174-
local instance_name, instance_conf
191+
local instance_name, instance_conf, err
175192
if #conf.instances == 1 then
176193
instance_name = conf.instances[1].name
177194
instance_conf = conf.instances[1]
178195
else
179-
instance_name, instance_conf = pick_target(ctx, conf, ups_tab)
196+
instance_name, instance_conf, err = pick_target(ctx, conf, ups_tab)
180197
end
181198

182199
core.log.info("picked instance: ", instance_name)
183-
return instance_name, instance_conf
200+
return instance_name, instance_conf, err
184201
end
185202

186203

@@ -194,7 +211,10 @@ function _M.access(conf, ctx)
194211
ups_tab["hash_on"] = hash_on
195212
end
196213

197-
local name, ai_instance = pick_ai_instance(ctx, conf, ups_tab)
214+
local name, ai_instance, err = pick_ai_instance(ctx, conf, ups_tab)
215+
if err then
216+
return 503, err
217+
end
198218
ctx.picked_ai_instance_name = name
199219
ctx.picked_ai_instance = ai_instance
200220
ctx.bypass_nginx_upstream = true

apisix/plugins/ai-proxy/schema.lua

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ _M.ai_proxy_schema = {
140140
type = "integer",
141141
minimum = 1,
142142
maximum = 60000,
143-
default = 3000,
143+
default = 30000,
144144
description = "timeout in milliseconds",
145145
},
146146
keepalive = {type = "boolean", default = true},
@@ -188,11 +188,16 @@ _M.ai_proxy_multi_schema = {
188188
default = { algorithm = "roundrobin" }
189189
},
190190
instances = ai_instance_schema,
191+
fallback_strategy = {
192+
type = "string",
193+
enum = { "instance_health_and_rate_limiting" },
194+
default = "instance_health_and_rate_limiting",
195+
},
191196
timeout = {
192197
type = "integer",
193198
minimum = 1,
194199
maximum = 60000,
195-
default = 3000,
200+
default = 30000,
196201
description = "timeout in milliseconds",
197202
},
198203
keepalive = {type = "boolean", default = true},

apisix/plugins/ai-rate-limiting.lua

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,9 @@ end
143143
function _M.check_instance_status(conf, ctx, instance_name)
144144
if conf == nil then
145145
local plugins = ctx.plugins
146-
for _, plugin in ipairs(plugins) do
147-
if plugin.name == plugin_name then
148-
conf = plugin
146+
for i = 1, #plugins, 2 do
147+
if plugins[i]["name"] == plugin_name then
148+
conf = plugins[i + 1]
149149
end
150150
end
151151
end

docs/en/latest/plugins/ai-proxy-multi.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ Proxying requests to OpenAI is supported now. Other LLM services will be support
7070
| provider.options.top_p | No | number | Defines the top-p probability mass (0 - 1) for nucleus sampling. | |
7171
| provider.options.stream | No | boolean | Enables streaming responses via SSE. | |
7272
| provider.override.endpoint | No | string | Custom host override for the AI provider. | |
73-
| timeout | No | integer | Request timeout in milliseconds (1-60000). | 3000 |
73+
| timeout | No | integer | Request timeout in milliseconds (1-60000). | 30000 |
7474
| keepalive | No | boolean | Enables keepalive connections. | true |
7575
| keepalive_timeout | No | integer | Timeout for keepalive connections (minimum 1000ms). | 60000 |
7676
| keepalive_pool | No | integer | Maximum keepalive connections. | 30 |

docs/en/latest/plugins/ai-proxy.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ Proxying requests to OpenAI is supported now. Other LLM services will be support
6363
| model.options.top_p | No | Number | Top-p probability mass. Range: 0 - 1 |
6464
| model.options.stream | No | Boolean | Stream response by SSE. |
6565
| override.endpoint | No | String | Override the endpoint of the AI provider |
66-
| timeout | No | Integer | Timeout in milliseconds for requests to LLM. Range: 1 - 60000. Default: 3000 |
66+
| timeout | No | Integer | Timeout in milliseconds for requests to LLM. Range: 1 - 60000. Default: 30000 |
6767
| keepalive | No | Boolean | Enable keepalive for requests to LLM. Default: true |
6868
| keepalive_timeout | No | Integer | Keepalive timeout in milliseconds for requests to LLM. Minimum: 1000. Default: 60000 |
6969
| keepalive_pool | No | Integer | Keepalive pool size for requests to LLM. Minimum: 1. Default: 30 |

t/plugin/ai-rate-limiting.t

Lines changed: 148 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ add_block_preprocessor(sub {
116116
end
117117
118118
ngx.status = 200
119-
ngx.say([[
119+
ngx.say(string.format([[
120120
{
121121
"choices": [
122122
{
@@ -127,12 +127,12 @@ add_block_preprocessor(sub {
127127
],
128128
"created": 1723780938,
129129
"id": "chatcmpl-9wiSIg5LYrrpxwsr2PubSQnbtod1P",
130-
"model": "gpt-4o-2024-05-13",
130+
"model": "%s",
131131
"object": "chat.completion",
132132
"system_fingerprint": "fp_abc28019ad",
133133
"usage": { "completion_tokens": 5, "prompt_tokens": 8, "total_tokens": 10 }
134134
}
135-
]])
135+
]], body.model))
136136
return
137137
end
138138
@@ -537,3 +537,148 @@ Authorization: Bearer token
537537
Authorization: Bearer token
538538
--- error_code eval
539539
[200, 200, 200, 200, 200, 200, 200, 403, 503]
540+
541+
542+
543+
=== TEST 13: ai-rate-limiting & ai-proxy-multi, with instance_health_and_rate_limiting strategy
544+
--- config
545+
location /t {
546+
content_by_lua_block {
547+
local t = require("lib.test_admin").test
548+
local code, body = t('/apisix/admin/routes/1',
549+
ngx.HTTP_PUT,
550+
[[{
551+
"uri": "/ai",
552+
"plugins": {
553+
"ai-proxy-multi": {
554+
"fallback_strategy": "instance_health_and_rate_limiting",
555+
"instances": [
556+
{
557+
"name": "openai-gpt4",
558+
"provider": "openai",
559+
"weight": 1,
560+
"priority": 1,
561+
"auth": {
562+
"header": {
563+
"Authorization": "Bearer token"
564+
}
565+
},
566+
"options": {
567+
"model": "gpt-4"
568+
},
569+
"override": {
570+
"endpoint": "http://localhost:16724"
571+
}
572+
},
573+
{
574+
"name": "openai-gpt3",
575+
"provider": "openai",
576+
"weight": 1,
577+
"priority": 0,
578+
"auth": {
579+
"header": {
580+
"Authorization": "Bearer token"
581+
}
582+
},
583+
"options": {
584+
"model": "gpt-3"
585+
},
586+
"override": {
587+
"endpoint": "http://localhost:16724"
588+
}
589+
}
590+
],
591+
"ssl_verify": false
592+
},
593+
"ai-rate-limiting": {
594+
"limit": 10,
595+
"time_window": 60
596+
}
597+
},
598+
"upstream": {
599+
"type": "roundrobin",
600+
"nodes": {
601+
"canbeanything.com": 1
602+
}
603+
}
604+
}]]
605+
)
606+
607+
if code >= 300 then
608+
ngx.status = code
609+
end
610+
ngx.say(body)
611+
}
612+
}
613+
--- response_body
614+
passed
615+
616+
617+
618+
=== TEST 14: fallback strategy should works
619+
--- config
620+
location /t {
621+
content_by_lua_block {
622+
local t = require("lib.test_admin").test
623+
local core = require("apisix.core")
624+
local code, _, body = t("/ai",
625+
ngx.HTTP_POST,
626+
[[{
627+
"messages": [
628+
{ "role": "system", "content": "You are a mathematician" },
629+
{ "role": "user", "content": "What is 1+1?" }
630+
]
631+
}]],
632+
nil,
633+
{
634+
["test-type"] = "options",
635+
["Content-Type"] = "application/json",
636+
}
637+
)
638+
639+
assert(code == 200, "first request should be successful")
640+
assert(core.string.find(body, "gpt-4"),
641+
"first request should be handled by higher priority instance")
642+
643+
local code, _, body = t("/ai",
644+
ngx.HTTP_POST,
645+
[[{
646+
"messages": [
647+
{ "role": "system", "content": "You are a mathematician" },
648+
{ "role": "user", "content": "What is 1+1?" }
649+
]
650+
}]],
651+
nil,
652+
{
653+
["test-type"] = "options",
654+
["Content-Type"] = "application/json",
655+
}
656+
)
657+
658+
assert(code == 200, "second request should be successful")
659+
assert(core.string.find(body, "gpt-3"),
660+
"second request should be handled by lower priority instance")
661+
662+
local code, body = t("/ai",
663+
ngx.HTTP_POST,
664+
[[{
665+
"messages": [
666+
{ "role": "system", "content": "You are a mathematician" },
667+
{ "role": "user", "content": "What is 1+1?" }
668+
]
669+
}]],
670+
nil,
671+
{
672+
["test-type"] = "options",
673+
["Content-Type"] = "application/json",
674+
}
675+
)
676+
677+
assert(code == 503, "third request should be failed")
678+
assert(core.string.find(body, "all servers tried"), "all servers tried")
679+
680+
ngx.say("passed")
681+
}
682+
}
683+
--- response_body
684+
passed

0 commit comments

Comments
 (0)