feat: support header prefix in limit-count rules (apache#13004)

shreemaan-abhishek · web-flow · commit 3487fb6c51cc · 2026-02-12T14:02:10.000+08:00
diff --git a/apisix/plugins/ai-rate-limiting.lua b/apisix/plugins/ai-rate-limiting.lua
@@ -94,6 +94,10 @@ local schema = {
                         },
                     },
                     key = {type = "string"},
+                    header_prefix = {
+                        type = "string",
+                        description = "prefix for rate limit headers"
+                    },
                 },
                 required = {"count", "time_window", "key"},
             },
diff --git a/apisix/plugins/limit-count/init.lua b/apisix/plugins/limit-count/init.lua
@@ -101,6 +101,10 @@ local schema = {
                         },
                     },
                     key = {type = "string"},
+                    header_prefix = {
+                        type = "string",
+                        description = "prefix for rate limit headers"
+                    },
                 },
                 required = {"count", "time_window", "key"},
             },
@@ -308,7 +312,7 @@ local function get_rules(ctx, conf)
     end
 
     local rules = {}
-    for _, rule in ipairs(conf.rules) do
+    for index, rule in ipairs(conf.rules) do
         local count, err = resolve_var(ctx, rule.count)
         if err then
             goto CONTINUE
@@ -326,6 +330,7 @@ local function get_rules(ctx, conf)
             time_window = time_window,
             key_type = "constant",
             key = key,
+            header_prefix = rule.header_prefix or index,
         })
 
         ::CONTINUE::
@@ -334,6 +339,28 @@ local function get_rules(ctx, conf)
 end
 
 
+
+local function construct_rate_limiting_headers(conf, name, rule, metadata)
+    local prefix = "X-"
+    if name == "ai-rate-limiting" then
+        prefix = "X-AI-"
+    end
+
+    if rule.header_prefix then
+        return {
+            limit_header = prefix .. rule.header_prefix .. "-RateLimit-Limit",
+            remaining_header = prefix .. rule.header_prefix .. "-RateLimit-Remaining",
+            reset_header = prefix .. rule.header_prefix .. "-RateLimit-Reset",
+        }
+    end
+    return  {
+        limit_header = conf.limit_header or metadata.limit_header,
+        remaining_header = conf.remaining_header or metadata.remaining_header,
+        reset_header = conf.reset_header or metadata.reset_header,
+    }
+end
+
+
 local function run_rate_limit(conf, rule, ctx, name, cost, dry_run)
     local lim, err = create_limit_obj(conf, rule, name)
 
@@ -387,11 +414,7 @@ local function run_rate_limit(conf, rule, ctx, name, cost, dry_run)
     end
     core.log.info("limit-count plugin-metadata: ", core.json.delay_encode(metadata))
 
-    local set_limit_headers = {
-        limit_header = conf.limit_header or metadata.limit_header,
-        remaining_header = conf.remaining_header or metadata.remaining_header,
-        reset_header = conf.reset_header or metadata.reset_header,
-    }
+    local set_limit_headers = construct_rate_limiting_headers(conf, name, rule, metadata)
     local phase = get_phase()
     local set_header = phase ~= "log"
 
diff --git a/docs/en/latest/plugins/ai-rate-limiting.md b/docs/en/latest/plugins/ai-rate-limiting.md
@@ -47,6 +47,7 @@ The `ai-rate-limiting` Plugin enforces token-based rate limiting for requests se
 | rules.count                  | integer or string | True  |          | >0 or variable expression                              | The maximum number of tokens allowed within a given time interval. Can be a static integer or a variable expression like `$http_custom_limit`. |
 | rules.time_window            | integer or string | True  |          | >0 or variable expression                              | The time interval corresponding to the rate limiting `count` in seconds. Can be a static integer or a variable expression. |
 | rules.key                    | string         | True     |          |                                                         | The key to count requests by. If the configured key does not exist, the rule will not be executed. The `key` is interpreted as a combination of variables, for example: `$http_custom_a $http_custom_b`. |
+| rules.header_prefix          | string         | False    |          |                                                         | Prefix for rate limit headers. If configured, the response will include `X-{header_prefix}-RateLimit-Limit`, `X-{header_prefix}-RateLimit-Remaining`, and `X-{header_prefix}-RateLimit-Reset` headers. If not configured, the index of the rule in the rules array is used as the prefix. For example, headers for the first rule will be `X-1-RateLimit-Limit`, `X-1-RateLimit-Remaining`, and `X-1-RateLimit-Reset`. |
 | show_limit_quota_header      | boolean        | False    | true     |                                                         | If true, includes `X-AI-RateLimit-Limit-*`, `X-AI-RateLimit-Remaining-*`, and `X-AI-RateLimit-Reset-*` headers in the response, where `*` is the instance name. |
 | limit_strategy               | string         | False    | total_tokens | [total_tokens, prompt_tokens, completion_tokens] | Type of token to apply rate limiting. `total_tokens` is the sum of `prompt_tokens` and `completion_tokens`. |
 | instances                    | array[object]  | False    |          |                                                         | LLM instance rate limiting configurations. |
diff --git a/docs/en/latest/plugins/limit-count.md b/docs/en/latest/plugins/limit-count.md
@@ -49,7 +49,8 @@ You may see the following rate limiting headers in the response:
 | rules                   | array[object] | False                               |               |                            | A list of rate limiting rules. Each rule is an object containing `count`, `time_window`, and `key`.                                                                                                                                                                                                                |
 | rules.count             | integer | True                                      |               | > 0                        | The maximum number of requests allowed within a given time interval.                                                                                                                                                                                                                                             |
 | rules.time_window       | integer | True                                      |               | > 0                        | The time interval corresponding to the rate limiting `count` in seconds.                                                                                                                                                                                                                                         |
-| rules.key               | string  | True                                      |               |                            | The key to count requests by. If the configured key does not exist, the rule will not be executed. The `key` is interpreted as a combination of variables, for example: `$http_custom_a $http_custom_b`.                                                                                                                                                                                                                                                                                   |
+| rules.key               | string  | True                                      |               |                            | The key to count requests by. If the configured key does not exist, the rule will not be executed. The `key` is interpreted as a combination of variables, for example: `$http_custom_a $http_custom_b`.                                                                                                                                                                                                                                                                   |
+| rules.header_prefix     | string  | False                                     |               |                            | Prefix for rate limit headers. If configured, the response will include `X-{header_prefix}-RateLimit-Limit`, `X-{header_prefix}-RateLimit-Remaining`, and `X-{header_prefix}-RateLimit-Reset` headers. If not configured, the index of the rule in the rules array is used as the prefix. For example, headers for the first rule will be `X-1-RateLimit-Limit`, `X-1-RateLimit-Remaining`, and `X-1-RateLimit-Reset`.                                                                                                                                                                                                                                                                  |
 | key_type                | string  | False                                     | var         | ["var","var_combination","constant"] | The type of key. If the `key_type` is `var`, the `key` is interpreted a variable. If the `key_type` is `var_combination`, the `key` is interpreted as a combination of variables. If the `key_type` is `constant`, the `key` is interpreted as a constant.                  |
 | key                     | string  | False                                     | remote_addr |                                        | The key to count requests by. If the `key_type` is `var`, the `key` is interpreted a variable. The variable does not need to be prefixed by a dollar sign (`$`). If the `key_type` is `var_combination`, the `key` is interpreted as a combination of variables. All variables should be prefixed by dollar signs (`$`). For example, to configure the `key` to use a combination of two request headers `custom-a` and `custom-b`, the `key` should be configured as `$http_custom_a $http_custom_b`. If the `key_type` is `constant`, the `key` is interpreted as a constant value. |
 | rejected_code           | integer | False                                     | 503           | [200,...,599]                          | The HTTP status code returned when a request is rejected for exceeding the threshold.                                                                                                                                                                                                                                                                                                                                                                                                                    |
diff --git a/docs/zh/latest/plugins/ai-rate-limiting.md b/docs/zh/latest/plugins/ai-rate-limiting.md
@@ -47,6 +47,7 @@ description: ai-rate-limiting 插件对发送到 LLM 服务的请求实施基于
 | rules.count                  | integer 或 string | 是  |          | >0 或变量表达式                              | 在给定时间间隔内允许的最大令牌数。可以是静态整数或变量表达式，如 `$http_custom_limit`。 |
 | rules.time_window            | integer 或 string | 是  |          | >0 或变量表达式                              | 与速率限制 `count` 对应的时间间隔（秒）。可以是静态整数或变量表达式。 |
 | rules.key                    | string         | 是     |          |                                                         | 用于计数请求的键。如果配置的键不存在，则不会执行该规则。`key` 被解释为变量组合，例如：`$http_custom_a $http_custom_b`。 |
+| rules.header_prefix          | string         | 否    |          |                                                         | 速率限制头部的前缀。如果配置了此项，响应将包含 `X-AI-{header_prefix}-RateLimit-Limit`、`X-AI-{header_prefix}-RateLimit-Remaining` 和 `X-AI-{header_prefix}-RateLimit-Reset` 头部。如果未配置，将使用规则索引 (从 1 开始) 作为前缀。|
 | show_limit_quota_header      | boolean        | 否    | true     |                                                         | 如果为 true，则在响应中包含 `X-AI-RateLimit-Limit-*`、`X-AI-RateLimit-Remaining-*` 和 `X-AI-RateLimit-Reset-*` 头部，其中 `*` 是实例名称。 |
 | limit_strategy               | string         | 否    | total_tokens | [total_tokens, prompt_tokens, completion_tokens] | 应用速率限制的令牌类型。`total_tokens` 是 `prompt_tokens` 和 `completion_tokens` 的总和。 |
 | instances                    | array[object]  | 否    |          |                                                         | LLM 实例速率限制配置。 |
diff --git a/docs/zh/latest/plugins/limit-count.md b/docs/zh/latest/plugins/limit-count.md
@@ -51,6 +51,7 @@ description: limit-count 插件使用固定窗口算法，通过给定时间间
 | rules.count | integer | 是 | | > 0 | 给定时间间隔内允许的最大请求数。 |
 | rules.time_window | integer | 是 | | > 0 | 速率限制 `count` 对应的时间间隔（以秒为单位）。 |
 | rules.key | string | 是 | | | 用于统计请求的键。如果配置的键不存在，则不会执行该规则。`key` 被解释为变量的组合，例如：`$http_custom_a $http_custom_b`。|
+| rules.header_prefix | string | 否 | | | 速率限制标头的前缀。如果已配置，响应将包含 `X-{header_prefix}-RateLimit-Limit`、`X-{header_prefix}-RateLimit-Remaining` 和 `X-{header_prefix}-RateLimit-Reset` 标头。如果未配置，则使用规则数组中规则的索引作为前缀。例如，第一个规则的标头将是 `X-1-RateLimit-Limit`、`X-1-RateLimit-Remaining` 和 `X-1-RateLimit-Reset`。|
 | key_type | string | 否 | var | ["var","var_combination","constant"] | key 的类型。如果`key_type` 为 `var`，则 `key` 将被解释为变量。如果 `key_type` 为 `var_combination`，则 `key` 将被解释为变量的组合。如果 `key_type` 为 `constant`，则 `key` 将被解释为常量。 |
 | key | string | 否 | remote_addr | | 用于计数请求的 key。如果 `key_type` 为 `var`，则 `key` 将被解释为变量。变量不需要以美元符号（`$`）为前缀。如果 `key_type` 为 `var_combination`，则 `key` 会被解释为变量的组合。所有变量都应该以美元符号 (`$`) 为前缀。例如，要配置 `key` 使用两个请求头 `custom-a` 和 `custom-b` 的组合，则 `key` 应该配置为 `$http_custom_a $http_custom_b`。如果 `key_type` 为 `constant`，则 `key` 会被解释为常量值。|
 | rejection_code | integer | 否 | 503 | [200,...,599] | 请求因超出阈值而被拒绝时返回的 HTTP 状态代码。|
diff --git a/t/plugin/ai-rate-limiting.t b/t/plugin/ai-rate-limiting.t
diff --git a/t/plugin/limit-count-rules.t b/t/plugin/limit-count-rules.t