@@ -116,7 +116,7 @@ add_block_preprocessor(sub {
116116 end
117117
118118 ngx.status = 200
119- ngx.say([[
119+ ngx.say(string.format( [[
120120{
121121 "choices": [
122122 {
@@ -127,12 +127,12 @@ add_block_preprocessor(sub {
127127 ],
128128 "created": 1723780938,
129129 "id": "chatcmpl-9wiSIg5LYrrpxwsr2PubSQnbtod1P",
130- "model": "gpt-4o-2024-05-13 ",
130+ "model": "%s ",
131131 "object": "chat.completion",
132132 "system_fingerprint": "fp_abc28019ad",
133133 "usage": { "completion_tokens": 5, "prompt_tokens": 8, "total_tokens": 10 }
134134}
135- ]])
135+ ]], body.model) )
136136 return
137137 end
138138
@@ -537,3 +537,148 @@ Authorization: Bearer token
537537Authorization: Bearer token
538538--- error_code eval
539539[200, 200, 200, 200, 200, 200, 200, 403, 503]
540+
541+
542+
543+ === TEST 13: ai-rate-limiting & ai-proxy-multi, with instance_health_and_rate_limiting strategy
544+ --- config
545+ location /t {
546+ content_by_lua_block {
547+ local t = require("lib.test_admin").test
548+ local code, body = t('/apisix/admin/routes/1',
549+ ngx.HTTP_PUT,
550+ [[{
551+ "uri": "/ai",
552+ "plugins": {
553+ "ai-proxy-multi": {
554+ "fallback_strategy": "instance_health_and_rate_limiting",
555+ "instances": [
556+ {
557+ "name": "openai-gpt4",
558+ "provider": "openai",
559+ "weight": 1,
560+ "priority": 1,
561+ "auth": {
562+ "header": {
563+ "Authorization": "Bearer token"
564+ }
565+ },
566+ "options": {
567+ "model": "gpt-4"
568+ },
569+ "override": {
570+ "endpoint": "http://localhost:16724"
571+ }
572+ },
573+ {
574+ "name": "openai-gpt3",
575+ "provider": "openai",
576+ "weight": 1,
577+ "priority": 0,
578+ "auth": {
579+ "header": {
580+ "Authorization": "Bearer token"
581+ }
582+ },
583+ "options": {
584+ "model": "gpt-3"
585+ },
586+ "override": {
587+ "endpoint": "http://localhost:16724"
588+ }
589+ }
590+ ],
591+ "ssl_verify": false
592+ },
593+ "ai-rate-limiting": {
594+ "limit": 10,
595+ "time_window": 60
596+ }
597+ },
598+ "upstream": {
599+ "type": "roundrobin",
600+ "nodes": {
601+ "canbeanything.com": 1
602+ }
603+ }
604+ }]]
605+ )
606+
607+ if code >= 300 then
608+ ngx.status = code
609+ end
610+ ngx.say(body)
611+ }
612+ }
613+ --- response_body
614+ passed
615+
616+
617+
618+ === TEST 14: fallback strategy should works
619+ --- config
620+ location /t {
621+ content_by_lua_block {
622+ local t = require("lib.test_admin").test
623+ local core = require("apisix.core")
624+ local code, _, body = t("/ai",
625+ ngx.HTTP_POST,
626+ [[{
627+ "messages": [
628+ { "role": "system", "content": "You are a mathematician" },
629+ { "role": "user", "content": "What is 1+1?" }
630+ ]
631+ }]],
632+ nil,
633+ {
634+ ["test-type"] = "options",
635+ ["Content-Type"] = "application/json",
636+ }
637+ )
638+
639+ assert(code == 200, "first request should be successful")
640+ assert(core.string.find(body, "gpt-4"),
641+ "first request should be handled by higher priority instance")
642+
643+ local code, _, body = t("/ai",
644+ ngx.HTTP_POST,
645+ [[{
646+ "messages": [
647+ { "role": "system", "content": "You are a mathematician" },
648+ { "role": "user", "content": "What is 1+1?" }
649+ ]
650+ }]],
651+ nil,
652+ {
653+ ["test-type"] = "options",
654+ ["Content-Type"] = "application/json",
655+ }
656+ )
657+
658+ assert(code == 200, "second request should be successful")
659+ assert(core.string.find(body, "gpt-3"),
660+ "second request should be handled by lower priority instance")
661+
662+ local code, body = t("/ai",
663+ ngx.HTTP_POST,
664+ [[{
665+ "messages": [
666+ { "role": "system", "content": "You are a mathematician" },
667+ { "role": "user", "content": "What is 1+1?" }
668+ ]
669+ }]],
670+ nil,
671+ {
672+ ["test-type"] = "options",
673+ ["Content-Type"] = "application/json",
674+ }
675+ )
676+
677+ assert(code == 503, "third request should be failed")
678+ assert(core.string.find(body, "all servers tried"), "all servers tried")
679+
680+ ngx.say("passed")
681+ }
682+ }
683+ --- response_body
684+ passed
0 commit comments