Skip to content

Commit d44cd1d

Browse files
test(rate-limiter): baseline comparison and boundary burst empirical proof
- Add --baseline flag to compare_performance.py that measures no-rate-limit plugin overhead, then reports per-implementation rate-limiter cost delta - Convert xfail test_fixed_window_burst_at_boundary into passing test that empirically proves fixed_window allows 2x limit at window boundaries - Add companion test_sliding_window_prevents_boundary_burst proving sliding_window blocks the same burst scenario Resolves two previously deferred limitations: - "Baseline / no-plugin overhead comparison" - "Fixed-window boundary burst — documented but not empirically demonstrated" Signed-off-by: Pratik Gandhi <gandhipratik203@gmail.com>
1 parent 358a1c7 commit d44cd1d

2 files changed

Lines changed: 91 additions & 28 deletions

File tree

plugins_rust/rate_limiter/compare_performance.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -114,14 +114,18 @@ def _make_plugin_config(
114114
config: dict[str, Any] = {
115115
"algorithm": algorithm,
116116
"backend": backend,
117-
"by_user": user_rate,
118117
"redis_url": redis_url,
119118
"redis_key_prefix": redis_key_prefix,
120119
"redis_fallback": False,
121120
}
122-
if dimensions >= 3:
123-
config["by_tenant"] = "6000000/m" if workload != "mixed" else "6/m"
124-
config["by_tool"] = {"benchmark_tool": "3000000/m" if workload != "mixed" else "5/m"}
121+
if dimensions == 0:
122+
# Baseline: no rate limits configured — plugin short-circuits immediately.
123+
pass
124+
else:
125+
config["by_user"] = user_rate
126+
if dimensions >= 3:
127+
config["by_tenant"] = "6000000/m" if workload != "mixed" else "6/m"
128+
config["by_tool"] = {"benchmark_tool": "3000000/m" if workload != "mixed" else "5/m"}
125129
return PluginConfig(
126130
name=f"rate-limiter-bench-{algorithm}-{backend}-d{dimensions}-{workload}",
127131
kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin",
@@ -438,6 +442,20 @@ async def _benchmark_throughput(
438442

439443
async def _run_latency(args: argparse.Namespace, redis_enabled: bool) -> None:
440444
"""Run latency-mode benchmarks."""
445+
# --- Baseline: no rate limits configured ---
446+
if args.baseline:
447+
hook = args.hooks[0]
448+
baseline_scenario = Scenario(algorithm="fixed_window", backend="memory", hook=hook, dimensions=0, workload="allow")
449+
print("=" * 88)
450+
print(f"BASELINE (no rate limits) / {hook}")
451+
print("=" * 88)
452+
baseline_result = await _benchmark_scenario(baseline_scenario, "Python", args.iterations, args.warmup, args.redis_url)
453+
print(f" Baseline: mean {baseline_result.mean_ms:.4f} ms | median {baseline_result.median_ms:.4f} ms | p95 {baseline_result.p95_ms:.4f} ms")
454+
print()
455+
else:
456+
baseline_result = None
457+
458+
# --- Per-scenario benchmarks ---
441459
scenarios = [
442460
Scenario(algorithm=algorithm, backend=backend, hook=hook, dimensions=args.dimensions, workload=args.workload)
443461
for algorithm in ("fixed_window", "sliding_window", "token_bucket")
@@ -459,9 +477,13 @@ async def _run_latency(args: argparse.Namespace, redis_enabled: bool) -> None:
459477
python_result = await _benchmark_scenario(scenario, "Python", args.iterations, args.warmup, args.redis_url)
460478
rust_result = await _benchmark_scenario(scenario, "Rust", args.iterations, args.warmup, args.redis_url)
461479
speedup = python_result.mean_ms / rust_result.mean_ms if rust_result.mean_ms else 0.0
462-
print(f" Python: mean {python_result.mean_ms:.3f} ms | median {python_result.median_ms:.3f} ms | p95 {python_result.p95_ms:.3f} ms")
463-
print(f" Rust: mean {rust_result.mean_ms:.3f} ms | median {rust_result.median_ms:.3f} ms | p95 {rust_result.p95_ms:.3f} ms")
480+
print(f" Python: mean {python_result.mean_ms:.3f} ms | median {python_result.median_ms:.3f} ms | p95 {python_result.p95_ms:.3f} ms")
481+
print(f" Rust: mean {rust_result.mean_ms:.3f} ms | median {rust_result.median_ms:.3f} ms | p95 {rust_result.p95_ms:.3f} ms")
464482
print(f" Speedup: {speedup:.2f}x faster")
483+
if baseline_result and baseline_result.mean_ms > 0:
484+
py_overhead = python_result.mean_ms - baseline_result.mean_ms
485+
rs_overhead = rust_result.mean_ms - baseline_result.mean_ms
486+
print(f" Rate-limiter overhead: Python +{py_overhead:.3f} ms | Rust +{rs_overhead:.3f} ms")
465487
print()
466488

467489

@@ -593,6 +615,12 @@ def _parse_args() -> argparse.Namespace:
593615
default=None,
594616
help="Thread count for throughput mode (default: sweep 1,2,4,8)",
595617
)
618+
parser.add_argument(
619+
"--baseline",
620+
action="store_true",
621+
default=False,
622+
help="Include a baseline run (no rate limits) to measure plugin overhead",
623+
)
596624
return parser.parse_args()
597625

598626

tests/unit/mcpgateway/plugins/plugins/rate_limiter/test_rate_limiter.py

Lines changed: 57 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -773,30 +773,22 @@ async def test_concurrent_requests_respect_limit():
773773
)
774774

775775

776-
@pytest.mark.xfail(
777-
strict=True,
778-
reason=(
779-
"Gap: fixed window allows 2× the limit at a window boundary. "
780-
"N requests at end of W1 + N requests at start of W2 all succeed."
781-
),
782-
)
783776
@pytest.mark.asyncio
784-
async def test_fixed_window_burst_at_boundary():
785-
"""
786-
A user can burst at a window boundary: N requests at the end of window W1
787-
and N requests at the start of W2 both succeed, giving 2× the limit in practice.
777+
async def test_fixed_window_allows_boundary_burst():
778+
"""Empirical proof: fixed_window allows 2× the limit at a window boundary.
779+
780+
A user sends N requests at the end of window W1 and N more at the start of
781+
W2. All 2N succeed because the counter resets at the boundary.
788782
789783
Example with limit=5/s:
790784
t=1000: requests 1-5 → allowed (window W1, count=5)
791785
t=1001: requests 6-10 → allowed (window W2 resets, count=1..5)
792786
Total = 10 requests in ~1 second against a limit of 5/s.
793787
794-
Fix: use a sliding window or token bucket algorithm.
788+
This is the expected behavior of the fixed_window algorithm — not a bug,
789+
but a documented trade-off. Use sliding_window or token_bucket to prevent
790+
boundary bursts (see companion test below).
795791
"""
796-
# Force the Python path: this test documents a known Python backend limitation
797-
# (fixed window allows 2× the limit at a window boundary via time mocking).
798-
# The Rust engine uses monotonic time for window tracking independently of
799-
# the Python time mock, so the burst scenario does not reproduce there.
800792
import plugins.rate_limiter.rate_limiter as _rl_mod
801793

802794
with patch.object(_rl_mod, "_RUST_AVAILABLE", False):
@@ -821,12 +813,55 @@ async def test_fixed_window_burst_at_boundary():
821813
if r.violation is None:
822814
allowed_total += 1
823815

824-
# Expected: a sliding window would cap total at ~5-6 across the boundary
825-
# Actual: fixed window allows all 10 (5 in W1 + 5 in W2)
826-
assert allowed_total <= 5, (
827-
f"Fixed window burst: {allowed_total} requests allowed across the window "
828-
f"boundary. Configured limit is 5/s. "
829-
f"Fix: replace fixed window with a sliding window or token bucket."
816+
# fixed_window: all 10 allowed (5 in W1 + 5 in W2 = 2× limit in ~1 second)
817+
assert allowed_total == 10, (
818+
f"Expected fixed_window to allow 2× the limit at boundary, got {allowed_total}/10"
819+
)
820+
821+
822+
@pytest.mark.asyncio
823+
async def test_sliding_window_prevents_boundary_burst():
824+
"""Companion proof: sliding_window prevents the boundary burst that fixed_window allows.
825+
826+
Same scenario as test_fixed_window_allows_boundary_burst but with
827+
sliding_window. The 5 requests from W1 are still within the sliding window
828+
when W2 starts, so the second batch is blocked.
829+
"""
830+
import plugins.rate_limiter.rate_limiter as _rl_mod
831+
832+
with patch.object(_rl_mod, "_RUST_AVAILABLE", False):
833+
plugin = RateLimiterPlugin(
834+
PluginConfig(
835+
name="rl-sw",
836+
kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin",
837+
hooks=[ToolHookType.TOOL_PRE_INVOKE],
838+
config={"by_user": "5/s", "algorithm": ALGORITHM_SLIDING_WINDOW},
839+
)
840+
)
841+
ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice"))
842+
payload = ToolPreInvokePayload(name="test_tool", arguments={})
843+
844+
allowed_total = 0
845+
846+
with patch("plugins.rate_limiter.rate_limiter.time") as mock_time:
847+
# Window W1: fill the limit exactly at t=1000
848+
mock_time.time.return_value = 1000.0
849+
for _ in range(5):
850+
r = await plugin.tool_pre_invoke(payload, ctx)
851+
if r.violation is None:
852+
allowed_total += 1
853+
854+
# Half a second later: W1 timestamps are still within the 1s sliding window
855+
mock_time.time.return_value = 1000.5
856+
for _ in range(5):
857+
r = await plugin.tool_pre_invoke(payload, ctx)
858+
if r.violation is None:
859+
allowed_total += 1
860+
861+
# sliding_window: only 5 allowed — the W1 timestamps at t=1000 are still
862+
# within the window at t=1000.5, so the second batch is blocked.
863+
assert allowed_total == 5, (
864+
f"Expected sliding_window to prevent boundary burst, got {allowed_total}/10 allowed"
830865
)
831866

832867

0 commit comments

Comments
 (0)