IBM
diff --git a/‎Makefile‎
Lines changed: 3 additions & 1 deletion b/‎Makefile‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎plugins/rate_limiter/rate_limiter.py‎
Lines changed: 27 additions & 10 deletions b/‎plugins/rate_limiter/rate_limiter.py‎
Lines changed: 27 additions & 10 deletions
diff --git a/‎plugins_rust/rate_limiter/benches/rate_limiter.rs‎
Lines changed: 31 additions & 20 deletions b/‎plugins_rust/rate_limiter/benches/rate_limiter.rs‎
Lines changed: 31 additions & 20 deletions
diff --git a/‎plugins_rust/rate_limiter/src/memory.rs‎
Lines changed: 4 additions & 1 deletion b/‎plugins_rust/rate_limiter/src/memory.rs‎
Lines changed: 4 additions & 1 deletion
@@ -2326,6 +2326,8 @@ MCP_RATE_LIMITER_REDIS_CAPACITY_LOCUSTFILE ?= tests/loadtest/locustfile_rate_lim
 RL_ALGORITHM ?= fixed_window
 RL_USERS ?= 100
 RL_SPAWN_RATE ?= 10
+RL_REQS_PER_SECOND ?= 0.25
+RL_PROMPT_ID ?=
 MCP_PROTOCOL_HOST ?= http://localhost:4444
 MCP_BENCHMARK_HOST ?= http://localhost:8080
 MCP_BENCHMARK_SERVER_ID ?= 9779b6698cbd4b4995ee04a4fab38737
@@ -2446,7 +2448,7 @@ benchmark-rate-limiter:                     ## Rate limiter correctness test (1
 # help: benchmark-rate-limiter-scale  - Multi-user scale test showing Redis memory divergence across algorithms
 .PHONY: benchmark-rate-limiter-scale
 RL_RUN_TIME ?= 300s
-benchmark-rate-limiter-scale:               ## Scale test: 500 unique users, Redis memory timeline per algorithm
+benchmark-rate-limiter-scale:               ## Scale test: RL_USERS unique users (default 100), Redis memory timeline per algorithm
 	@echo "📈 Running rate limiter scale test (resource divergence)..."
 	@echo "   Algorithm: $(RL_ALGORITHM)  (must match plugins/config.yaml)"
 	@echo "   Users:     $(RL_USERS) unique identities  (each creates own Redis key)"
 
@@ -118,6 +118,8 @@ def _parse_rate(rate: str) -> tuple[int, int]:
         count = int(count_str)
     except (ValueError, AttributeError):
         raise ValueError(f"Invalid rate string {rate!r}: expected '<count>/<unit>' e.g. '60/m'")
+    if count <= 0:
+        raise ValueError(f"Invalid rate string {rate!r}: count must be > 0, got {count}")
     per = per.strip().lower()
     if per in ("s", "sec", "second"):
         return count, 1
@@ -449,6 +451,7 @@ def __init__(self, algorithm: FixedWindowAlgorithm | SlidingWindowAlgorithm | To
         self._lock = asyncio.Lock()
         self._sweep_interval = sweep_interval
         self._sweep_task: Optional[asyncio.Task] = None  # type: ignore[type-arg]
+        self._parsed_cache: Dict[str, tuple[int, int]] = {}  # rate_str → (count, window)
 
     def _ensure_sweep_task(self) -> None:
         """Start the background sweep task if it is not already running."""
@@ -470,7 +473,11 @@ async def allow(self, key: str, limit: Optional[str]) -> tuple[bool, int, int, d
         self._ensure_sweep_task()
         if not limit:
             return True, 0, 0, {"limited": False}
-        count, window = _parse_rate(limit)
+        parsed = self._parsed_cache.get(limit)
+        if parsed is None:
+            parsed = _parse_rate(limit)
+            self._parsed_cache[limit] = parsed
+        count, window = parsed
         return await self._algorithm.allow(self._lock, key, count, window)
 
 
@@ -826,27 +833,37 @@ async def allow_many(self, checks: List[Tuple[str, str]]) -> List[tuple[bool, in
         Returns:
             One (allowed, limit, reset_timestamp, metadata) tuple per input check.
         """
-        active = [(key, limit) for key, limit in checks if limit]
-        if not active:
-            return [(True, 0, 0, {"limited": False})] * len(checks)
+        no_limit: tuple[bool, int, int, dict[str, Any]] = (True, 0, 0, {"limited": False})
+        active_indices = [i for i, (_, limit) in enumerate(checks) if limit]
+        if not active_indices:
+            return [no_limit] * len(checks)
 
+        active = [checks[i] for i in active_indices]
         parsed: List[Tuple[str, int, int]] = [(key, *_parse_rate(limit)) for key, limit in active]  # type: ignore[misc]
         redis_keys = [f"{self._prefix}:{key}:{window}" for key, _count, window in parsed]
 
         try:
             client = await self._get_client()
             await self._ensure_scripts_loaded(client)
             if self._algorithm_name == ALGORITHM_SLIDING_WINDOW:
-                return await self._allow_many_sliding(client, parsed, redis_keys)
-            if self._algorithm_name == ALGORITHM_TOKEN_BUCKET:
-                return await self._allow_many_token_bucket(client, parsed, redis_keys)
-            return await self._allow_many_fixed(client, parsed, redis_keys)
+                active_results = await self._allow_many_sliding(client, parsed, redis_keys)
+            elif self._algorithm_name == ALGORITHM_TOKEN_BUCKET:
+                active_results = await self._allow_many_token_bucket(client, parsed, redis_keys)
+            else:
+                active_results = await self._allow_many_fixed(client, parsed, redis_keys)
 
         except Exception:
             logger.exception("RedisBackend.allow_many failed; %s", "falling back to memory" if self._fallback else "allowing request")
             if self._fallback is not None:
-                return [await self._fallback.allow(key, limit) for key, limit in active]
-            return [(True, 0, 0, {"limited": False})] * len(active)
+                active_results = [await self._fallback.allow(key, limit) for key, limit in active]
+            else:
+                active_results = [no_limit] * len(active)
+
+        # Map active results back to the full input list.
+        results: List[tuple[bool, int, int, dict[str, Any]]] = [no_limit] * len(checks)
+        for idx, result in zip(active_indices, active_results):
+            results[idx] = result
+        return results
 
     async def _allow_many_fixed(
         self, client: Any, parsed: List[Tuple[str, int, int]], redis_keys: List[str]
 
@@ -3,85 +3,96 @@
 //
 // Criterion benchmarks for the rate limiter memory backend.
 // PERF-01, MEM-02, MEM-03, MEM-04.
+//
+// Each iteration measures a single "under-limit" request — the realistic
+// hot path in production.  The clock advances between iterations so the
+// window resets and counters stay low, preventing benchmark drift into the
+// "blocked" code path or unbounded VecDeque growth (sliding window).
 
 use std::hint::black_box;
 use std::sync::Arc;
 
 use criterion::{Criterion, criterion_group, criterion_main};
 use rate_limiter_rust::{
-    clock::FakeClock,
+    clock::{FakeClock, FakeClockHandle},
     config::{Algorithm, EngineConfig, parse_rate},
     engine::RateLimiterEngine,
 };
 
 const T0_UNIX: i64 = 1_000_000;
-const WINDOW: u64 = 1_000_000_000; // 1s
+const LIMIT: u64 = 100;
+const WINDOW: u64 = 60_000_000_000; // 60s in nanos
 
-fn make_engine(algorithm: Algorithm) -> RateLimiterEngine {
-    let (clock, _handle) = FakeClock::new(T0_UNIX);
+fn make_engine(algorithm: Algorithm) -> (RateLimiterEngine, FakeClockHandle) {
+    let (clock, handle) = FakeClock::new(T0_UNIX);
     let cfg = EngineConfig {
-        by_user: Some(parse_rate("1000000/s").unwrap()),
+        by_user: Some(parse_rate(&format!("{}/m", LIMIT)).unwrap()),
         by_tenant: None,
         by_tool: Default::default(),
         algorithm,
     };
-    RateLimiterEngine::new_with_clock(cfg, Arc::new(clock))
+    (RateLimiterEngine::new_with_clock(cfg, Arc::new(clock)), handle)
 }
 
 fn bench_fixed_window(c: &mut Criterion) {
-    let engine = make_engine(Algorithm::FixedWindow);
+    let (engine, handle) = make_engine(Algorithm::FixedWindow);
     c.bench_function("fixed_window/single_key", |b| {
         b.iter(|| {
+            // Advance past the window so each iteration is a fresh "allowed" request.
+            handle.advance_secs(61);
             engine
                 .evaluate_many(
-                    black_box(vec![("user:bench".to_string(), 1_000_000, WINDOW)]),
-                    T0_UNIX,
+                    black_box(vec![("user:bench".to_string(), LIMIT, WINDOW)]),
+                    handle.unix_secs(),
                 )
                 .unwrap()
         })
     });
 }
 
 fn bench_token_bucket(c: &mut Criterion) {
-    let engine = make_engine(Algorithm::TokenBucket);
+    let (engine, handle) = make_engine(Algorithm::TokenBucket);
     c.bench_function("token_bucket/single_key", |b| {
         b.iter(|| {
+            handle.advance_secs(61);
             engine
                 .evaluate_many(
-                    black_box(vec![("user:bench".to_string(), 1_000_000, WINDOW)]),
-                    T0_UNIX,
+                    black_box(vec![("user:bench".to_string(), LIMIT, WINDOW)]),
+                    handle.unix_secs(),
                 )
                 .unwrap()
         })
     });
 }
 
 fn bench_sliding_window(c: &mut Criterion) {
-    let engine = make_engine(Algorithm::SlidingWindow);
+    let (engine, handle) = make_engine(Algorithm::SlidingWindow);
     c.bench_function("sliding_window/single_key", |b| {
         b.iter(|| {
+            handle.advance_secs(61);
             engine
                 .evaluate_many(
-                    black_box(vec![("user:bench".to_string(), 1_000_000, WINDOW)]),
-                    T0_UNIX,
+                    black_box(vec![("user:bench".to_string(), LIMIT, WINDOW)]),
+                    handle.unix_secs(),
                 )
                 .unwrap()
         })
     });
 }
 
 fn bench_multi_dim(c: &mut Criterion) {
-    let engine = make_engine(Algorithm::FixedWindow);
+    let (engine, handle) = make_engine(Algorithm::FixedWindow);
     c.bench_function("fixed_window/three_dims", |b| {
         b.iter(|| {
+            handle.advance_secs(61);
             engine
                 .evaluate_many(
                     black_box(vec![
-                        ("user:alice".to_string(), 1_000_000, WINDOW),
-                        ("tenant:acme".to_string(), 10_000_000, WINDOW),
-                        ("tool:search".to_string(), 100_000, WINDOW),
+                        ("user:alice".to_string(), LIMIT, WINDOW),
+                        ("tenant:acme".to_string(), LIMIT * 100, WINDOW),
+                        ("tool:search".to_string(), LIMIT / 10, WINDOW),
                     ]),
-                    T0_UNIX,
+                    handle.unix_secs(),
                 )
                 .unwrap()
         })
 
@@ -206,7 +206,10 @@ fn fixed_window(
         *window_start_unix = now_unix;
     }
 
-    let window_secs = (window_nanos / 1_000_000_000) as i64;
+    // At least 1 second so reset_timestamp is always in the future, even if
+    // window_nanos < 1 billion (sub-second window — currently unreachable via
+    // config parsing but guarded defensively).
+    let window_secs = (window_nanos / 1_000_000_000).max(1) as i64;
     // Constant within a window — matches Python backend behaviour (CORR-02).
     let reset_timestamp = *window_start_unix + window_secs;