Skip to content

Commit 3487fb6

Browse files
feat: support header prefix in limit-count rules (apache#13004)
1 parent afe61d7 commit 3487fb6

File tree

8 files changed

+378
-7
lines changed

8 files changed

+378
-7
lines changed

apisix/plugins/ai-rate-limiting.lua

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,10 @@ local schema = {
9494
},
9595
},
9696
key = {type = "string"},
97+
header_prefix = {
98+
type = "string",
99+
description = "prefix for rate limit headers"
100+
},
97101
},
98102
required = {"count", "time_window", "key"},
99103
},

apisix/plugins/limit-count/init.lua

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,10 @@ local schema = {
101101
},
102102
},
103103
key = {type = "string"},
104+
header_prefix = {
105+
type = "string",
106+
description = "prefix for rate limit headers"
107+
},
104108
},
105109
required = {"count", "time_window", "key"},
106110
},
@@ -308,7 +312,7 @@ local function get_rules(ctx, conf)
308312
end
309313

310314
local rules = {}
311-
for _, rule in ipairs(conf.rules) do
315+
for index, rule in ipairs(conf.rules) do
312316
local count, err = resolve_var(ctx, rule.count)
313317
if err then
314318
goto CONTINUE
@@ -326,6 +330,7 @@ local function get_rules(ctx, conf)
326330
time_window = time_window,
327331
key_type = "constant",
328332
key = key,
333+
header_prefix = rule.header_prefix or index,
329334
})
330335

331336
::CONTINUE::
@@ -334,6 +339,28 @@ local function get_rules(ctx, conf)
334339
end
335340

336341

342+
343+
local function construct_rate_limiting_headers(conf, name, rule, metadata)
344+
local prefix = "X-"
345+
if name == "ai-rate-limiting" then
346+
prefix = "X-AI-"
347+
end
348+
349+
if rule.header_prefix then
350+
return {
351+
limit_header = prefix .. rule.header_prefix .. "-RateLimit-Limit",
352+
remaining_header = prefix .. rule.header_prefix .. "-RateLimit-Remaining",
353+
reset_header = prefix .. rule.header_prefix .. "-RateLimit-Reset",
354+
}
355+
end
356+
return {
357+
limit_header = conf.limit_header or metadata.limit_header,
358+
remaining_header = conf.remaining_header or metadata.remaining_header,
359+
reset_header = conf.reset_header or metadata.reset_header,
360+
}
361+
end
362+
363+
337364
local function run_rate_limit(conf, rule, ctx, name, cost, dry_run)
338365
local lim, err = create_limit_obj(conf, rule, name)
339366

@@ -387,11 +414,7 @@ local function run_rate_limit(conf, rule, ctx, name, cost, dry_run)
387414
end
388415
core.log.info("limit-count plugin-metadata: ", core.json.delay_encode(metadata))
389416

390-
local set_limit_headers = {
391-
limit_header = conf.limit_header or metadata.limit_header,
392-
remaining_header = conf.remaining_header or metadata.remaining_header,
393-
reset_header = conf.reset_header or metadata.reset_header,
394-
}
417+
local set_limit_headers = construct_rate_limiting_headers(conf, name, rule, metadata)
395418
local phase = get_phase()
396419
local set_header = phase ~= "log"
397420

docs/en/latest/plugins/ai-rate-limiting.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ The `ai-rate-limiting` Plugin enforces token-based rate limiting for requests se
4747
| rules.count | integer or string | True | | >0 or variable expression | The maximum number of tokens allowed within a given time interval. Can be a static integer or a variable expression like `$http_custom_limit`. |
4848
| rules.time_window | integer or string | True | | >0 or variable expression | The time interval corresponding to the rate limiting `count` in seconds. Can be a static integer or a variable expression. |
4949
| rules.key | string | True | | | The key to count requests by. If the configured key does not exist, the rule will not be executed. The `key` is interpreted as a combination of variables, for example: `$http_custom_a $http_custom_b`. |
50+
| rules.header_prefix | string | False | | | Prefix for rate limit headers. If configured, the response will include `X-{header_prefix}-RateLimit-Limit`, `X-{header_prefix}-RateLimit-Remaining`, and `X-{header_prefix}-RateLimit-Reset` headers. If not configured, the index of the rule in the rules array is used as the prefix. For example, headers for the first rule will be `X-1-RateLimit-Limit`, `X-1-RateLimit-Remaining`, and `X-1-RateLimit-Reset`. |
5051
| show_limit_quota_header | boolean | False | true | | If true, includes `X-AI-RateLimit-Limit-*`, `X-AI-RateLimit-Remaining-*`, and `X-AI-RateLimit-Reset-*` headers in the response, where `*` is the instance name. |
5152
| limit_strategy | string | False | total_tokens | [total_tokens, prompt_tokens, completion_tokens] | Type of token to apply rate limiting. `total_tokens` is the sum of `prompt_tokens` and `completion_tokens`. |
5253
| instances | array[object] | False | | | LLM instance rate limiting configurations. |

docs/en/latest/plugins/limit-count.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ You may see the following rate limiting headers in the response:
4949
| rules | array[object] | False | | | A list of rate limiting rules. Each rule is an object containing `count`, `time_window`, and `key`. |
5050
| rules.count | integer | True | | > 0 | The maximum number of requests allowed within a given time interval. |
5151
| rules.time_window | integer | True | | > 0 | The time interval corresponding to the rate limiting `count` in seconds. |
52-
| rules.key | string | True | | | The key to count requests by. If the configured key does not exist, the rule will not be executed. The `key` is interpreted as a combination of variables, for example: `$http_custom_a $http_custom_b`. |
52+
| rules.key | string | True | | | The key to count requests by. If the configured key does not exist, the rule will not be executed. The `key` is interpreted as a combination of variables, for example: `$http_custom_a $http_custom_b`. |
53+
| rules.header_prefix | string | False | | | Prefix for rate limit headers. If configured, the response will include `X-{header_prefix}-RateLimit-Limit`, `X-{header_prefix}-RateLimit-Remaining`, and `X-{header_prefix}-RateLimit-Reset` headers. If not configured, the index of the rule in the rules array is used as the prefix. For example, headers for the first rule will be `X-1-RateLimit-Limit`, `X-1-RateLimit-Remaining`, and `X-1-RateLimit-Reset`. |
5354
| key_type | string | False | var | ["var","var_combination","constant"] | The type of key. If the `key_type` is `var`, the `key` is interpreted a variable. If the `key_type` is `var_combination`, the `key` is interpreted as a combination of variables. If the `key_type` is `constant`, the `key` is interpreted as a constant. |
5455
| key | string | False | remote_addr | | The key to count requests by. If the `key_type` is `var`, the `key` is interpreted a variable. The variable does not need to be prefixed by a dollar sign (`$`). If the `key_type` is `var_combination`, the `key` is interpreted as a combination of variables. All variables should be prefixed by dollar signs (`$`). For example, to configure the `key` to use a combination of two request headers `custom-a` and `custom-b`, the `key` should be configured as `$http_custom_a $http_custom_b`. If the `key_type` is `constant`, the `key` is interpreted as a constant value. |
5556
| rejected_code | integer | False | 503 | [200,...,599] | The HTTP status code returned when a request is rejected for exceeding the threshold. |

docs/zh/latest/plugins/ai-rate-limiting.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ description: ai-rate-limiting 插件对发送到 LLM 服务的请求实施基于
4747
| rules.count | integer 或 string || | >0 或变量表达式 | 在给定时间间隔内允许的最大令牌数。可以是静态整数或变量表达式,如 `$http_custom_limit`|
4848
| rules.time_window | integer 或 string || | >0 或变量表达式 | 与速率限制 `count` 对应的时间间隔(秒)。可以是静态整数或变量表达式。 |
4949
| rules.key | string || | | 用于计数请求的键。如果配置的键不存在,则不会执行该规则。`key` 被解释为变量组合,例如:`$http_custom_a $http_custom_b`|
50+
| rules.header_prefix | string || | | 速率限制头部的前缀。如果配置了此项,响应将包含 `X-AI-{header_prefix}-RateLimit-Limit``X-AI-{header_prefix}-RateLimit-Remaining``X-AI-{header_prefix}-RateLimit-Reset` 头部。如果未配置,将使用规则索引 (从 1 开始) 作为前缀。|
5051
| show_limit_quota_header | boolean || true | | 如果为 true,则在响应中包含 `X-AI-RateLimit-Limit-*``X-AI-RateLimit-Remaining-*``X-AI-RateLimit-Reset-*` 头部,其中 `*` 是实例名称。 |
5152
| limit_strategy | string || total_tokens | [total_tokens, prompt_tokens, completion_tokens] | 应用速率限制的令牌类型。`total_tokens``prompt_tokens``completion_tokens` 的总和。 |
5253
| instances | array[object] || | | LLM 实例速率限制配置。 |

docs/zh/latest/plugins/limit-count.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ description: limit-count 插件使用固定窗口算法,通过给定时间间
5151
| rules.count | integer || | > 0 | 给定时间间隔内允许的最大请求数。 |
5252
| rules.time_window | integer || | > 0 | 速率限制 `count` 对应的时间间隔(以秒为单位)。 |
5353
| rules.key | string || | | 用于统计请求的键。如果配置的键不存在,则不会执行该规则。`key` 被解释为变量的组合,例如:`$http_custom_a $http_custom_b`|
54+
| rules.header_prefix | string || | | 速率限制标头的前缀。如果已配置,响应将包含 `X-{header_prefix}-RateLimit-Limit``X-{header_prefix}-RateLimit-Remaining``X-{header_prefix}-RateLimit-Reset` 标头。如果未配置,则使用规则数组中规则的索引作为前缀。例如,第一个规则的标头将是 `X-1-RateLimit-Limit``X-1-RateLimit-Remaining``X-1-RateLimit-Reset`|
5455
| key_type | string || var | ["var","var_combination","constant"] | key 的类型。如果`key_type``var`,则 `key` 将被解释为变量。如果 `key_type``var_combination`,则 `key` 将被解释为变量的组合。如果 `key_type``constant`,则 `key` 将被解释为常量。 |
5556
| key | string || remote_addr | | 用于计数请求的 key。如果 `key_type``var`,则 `key` 将被解释为变量。变量不需要以美元符号(`$`)为前缀。如果 `key_type``var_combination`,则 `key` 会被解释为变量的组合。所有变量都应该以美元符号 (`$`) 为前缀。例如,要配置 `key` 使用两个请求头 `custom-a``custom-b` 的组合,则 `key` 应该配置为 `$http_custom_a $http_custom_b`。如果 `key_type``constant`,则 `key` 会被解释为常量值。|
5657
| rejection_code | integer || 503 | [200,...,599] | 请求因超出阈值而被拒绝时返回的 HTTP 状态代码。|

0 commit comments

Comments
 (0)