Skip to content

Commit 3efe490

Browse files
committed
chore: added honeybeepf-llm for the benchmark test
1 parent 9c1a209 commit 3efe490

10 files changed

Lines changed: 278 additions & 35 deletions

File tree

.github/workflows/benchmark.yml

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,37 @@ jobs:
3333
source .venv/bin/activate
3434
uv pip install -r requirements.txt
3535
36+
- name: Install Rust toolchain (stable + nightly + rust-src)
37+
run: |
38+
rustup install stable
39+
rustup install nightly
40+
rustup component add rust-src --toolchain nightly
41+
working-directory: .
42+
43+
- name: Cache cargo
44+
uses: Swatinem/rust-cache@v2
45+
with:
46+
workspaces: honeybeepf-llm
47+
48+
- name: Install bpf-linker
49+
run: cargo install bpf-linker
50+
working-directory: .
51+
52+
- name: Build honeybeepf-llm agent
53+
run: cargo +nightly build --release -p honeybeepf-llm
54+
working-directory: honeybeepf-llm
55+
56+
- name: setcap eBPF capabilities
57+
run: |
58+
sudo setcap cap_bpf,cap_sys_admin,cap_perfmon,cap_net_admin=eip honeybeepf-llm/target/release/honeybeepf-llm
59+
getcap honeybeepf-llm/target/release/honeybeepf-llm
60+
working-directory: .
61+
62+
- name: Create otel-output directory
63+
run: |
64+
mkdir -p otel-output
65+
chmod 777 otel-output
66+
3667
- name: Start services
3768
run: docker compose up -d --wait
3869

@@ -42,11 +73,23 @@ jobs:
4273
python run_benchmark.py \
4374
--scenario ${{ inputs.scenario }} \
4475
--output results.json \
45-
--markdown summary.md
76+
--markdown summary.md \
77+
--ebpf-binary ../../honeybeepf-llm/target/release/honeybeepf-llm
4678
4779
- name: Post results to Job Summary
4880
run: cat summary.md >> "$GITHUB_STEP_SUMMARY"
4981

82+
- name: Upload artifacts
83+
if: always()
84+
uses: actions/upload-artifact@v4
85+
with:
86+
name: benchmark-output
87+
path: |
88+
tests/benchmark/results.json
89+
tests/benchmark/summary.md
90+
tests/benchmark/otel-output/otel.jsonl
91+
retention-days: 14
92+
5093
- name: Teardown
5194
if: always()
5295
run: docker compose down

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,5 +67,8 @@ __pycache__/
6767
*.pyc
6868
.venv/
6969

70+
# Benchmark OTel collector output (runtime artifacts)
71+
tests/benchmark/otel-output/
72+
7073
# Superpowers docs
7174
docs/superpowers/

tests/benchmark/Dockerfile.mock

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
11
FROM python:3.12-slim
22
WORKDIR /app
3+
RUN apt-get update && \
4+
apt-get install -y --no-install-recommends openssl && \
5+
rm -rf /var/lib/apt/lists/*
36
RUN pip install --no-cache-dir fastapi uvicorn
7+
RUN openssl req -x509 -newkey rsa:2048 \
8+
-keyout /app/key.pem -out /app/cert.pem \
9+
-days 3650 -nodes \
10+
-subj '/CN=mock-llm' \
11+
-addext "subjectAltName=DNS:mock-llm,DNS:localhost,IP:127.0.0.1"
412
COPY mock_server.py .
5-
EXPOSE 8080
6-
CMD ["uvicorn", "mock_server:app", "--host", "0.0.0.0", "--port", "8080", "--log-level", "warning"]
13+
EXPOSE 8443
14+
CMD ["uvicorn", "mock_server:app", \
15+
"--host", "0.0.0.0", "--port", "8443", \
16+
"--ssl-keyfile=/app/key.pem", "--ssl-certfile=/app/cert.pem", \
17+
"--log-level", "warning"]

tests/benchmark/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from load_generator import LoadProfile
44

5-
DIRECT_URL = "http://localhost:8080"
5+
DIRECT_URL = "https://localhost:8443"
66
PROXY_URL = "http://localhost:4000"
77
CONTAINER_NAMES = ["benchmark-mock-llm-1", "benchmark-litellm-1"]
88

tests/benchmark/docker-compose.yml

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,13 @@ services:
44
context: .
55
dockerfile: Dockerfile.mock
66
ports:
7-
- "8080:8080"
7+
- "8443:8443"
88
environment:
99
- MOCK_RESPONSE_DELAY_MS=50
1010
- MOCK_COMPLETION_TOKENS=30
1111
healthcheck:
12-
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8080/health')"]
12+
test: ["CMD", "python", "-c",
13+
"import urllib.request, ssl; ctx=ssl._create_unverified_context(); urllib.request.urlopen('https://localhost:8443/health', context=ctx)"]
1314
interval: 2s
1415
timeout: 2s
1516
retries: 5
@@ -24,11 +25,25 @@ services:
2425
depends_on:
2526
mock-llm:
2627
condition: service_healthy
28+
otel-collector:
29+
condition: service_started
2730
environment:
2831
- LITELLM_LOG=ERROR
32+
- OTEL_EXPORTER=otlp_grpc
33+
- OTEL_ENDPOINT=http://otel-collector:4317
34+
- OTEL_SERVICE_NAME=litellm
2935
healthcheck:
3036
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')"]
3137
interval: 2s
3238
timeout: 3s
3339
retries: 5
3440
start_period: 30s
41+
42+
otel-collector:
43+
image: otel/opentelemetry-collector-contrib:0.110.0
44+
command: ["--config=/etc/otel-collector.yaml"]
45+
volumes:
46+
- ./otel-collector.yaml:/etc/otel-collector.yaml
47+
- ./otel-output:/output
48+
ports:
49+
- "4317:4317"

tests/benchmark/litellm_config.yaml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,21 @@ model_list:
22
- model_name: gpt-4o-mini
33
litellm_params:
44
model: openai/gpt-4o-mini
5-
api_base: http://mock-llm:8080/v1
5+
api_base: https://mock-llm:8443/v1
66
api_key: fake-key-for-benchmark
77

88
- model_name: claude-sonnet-4-20250514
99
litellm_params:
1010
model: openai/claude-sonnet-4-20250514
11-
api_base: http://mock-llm:8080/v1
11+
api_base: https://mock-llm:8443/v1
1212
api_key: fake-key-for-benchmark
1313

1414
- model_name: gemini-2.0-flash
1515
litellm_params:
1616
model: openai/gemini-2.0-flash
17-
api_base: http://mock-llm:8080/v1
17+
api_base: https://mock-llm:8443/v1
1818
api_key: fake-key-for-benchmark
19+
20+
litellm_settings:
21+
callbacks: ["otel"]
22+
ssl_verify: false

tests/benchmark/load_generator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ async def worker(client: httpx.AsyncClient):
6565

6666
t_start = time.monotonic()
6767

68-
async with httpx.AsyncClient(timeout=self._timeout) as client:
68+
async with httpx.AsyncClient(timeout=self._timeout, verify=False) as client:
6969
if profile.rate_rps > 0 and profile.duration_secs > 0:
7070
interval = 1.0 / profile.rate_rps
7171
tasks = []
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
receivers:
2+
otlp:
3+
protocols:
4+
grpc:
5+
endpoint: 0.0.0.0:4317
6+
7+
exporters:
8+
debug:
9+
verbosity: detailed
10+
file:
11+
path: /output/otel.jsonl
12+
13+
service:
14+
telemetry:
15+
logs:
16+
level: info
17+
pipelines:
18+
traces:
19+
receivers: [otlp]
20+
exporters: [debug, file]
21+
metrics:
22+
receivers: [otlp]
23+
exporters: [debug, file]
24+
logs:
25+
receivers: [otlp]
26+
exporters: [debug, file]

tests/benchmark/report.py

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,35 @@ def _overhead(key: str, b: float, p: float) -> str:
1616
return "-"
1717

1818

19-
def format_report(baseline: dict, proxy: dict) -> str:
20-
header = f"{'Metric':<25} {'Baseline':>12} {'LiteLLM Proxy':>14} {'Overhead':>12}"
19+
def format_report(baseline: dict, proxy: dict, ebpf: dict | None = None) -> str:
20+
if ebpf is None:
21+
header = f"{'Metric':<25} {'Baseline':>12} {'LiteLLM Proxy':>14} {'Overhead':>12}"
22+
sep = "-" * len(header)
23+
lines = [header, sep]
24+
for row in ROWS:
25+
label, key, fmt = row[0], row[1], row[2]
26+
mult = row[3] if len(row) > 3 else 1
27+
b = baseline.get(key, 0) * mult
28+
p = proxy.get(key, 0) * mult
29+
lines.append(f"{label:<25} {format(b, fmt):>12} {format(p, fmt):>14} {_overhead(key, b, p):>12}")
30+
return "\n".join(lines)
31+
32+
header = (
33+
f"{'Metric':<25} {'Baseline':>12} {'LiteLLM Proxy':>14} {'honeybeepf-llm':>15} "
34+
f"{'Proxy Overhead':>14} {'eBPF Overhead':>14}"
35+
)
2136
sep = "-" * len(header)
2237
lines = [header, sep]
23-
2438
for row in ROWS:
2539
label, key, fmt = row[0], row[1], row[2]
2640
mult = row[3] if len(row) > 3 else 1
2741
b = baseline.get(key, 0) * mult
2842
p = proxy.get(key, 0) * mult
29-
lines.append(f"{label:<25} {format(b, fmt):>12} {format(p, fmt):>14} {_overhead(key, b, p):>12}")
30-
43+
e = ebpf.get(key, 0) * mult
44+
lines.append(
45+
f"{label:<25} {format(b, fmt):>12} {format(p, fmt):>14} {format(e, fmt):>15} "
46+
f"{_overhead(key, b, p):>14} {_overhead(key, b, e):>14}"
47+
)
3148
return "\n".join(lines)
3249

3350

@@ -40,24 +57,38 @@ def format_markdown(results: dict) -> str:
4057

4158
baseline = data.get("baseline", {})
4259
proxy = data.get("proxy", {})
60+
ebpf = data.get("ebpf")
4361

4462
lines.append(f"### {scenario_name}")
4563
lines.append("")
46-
lines.append("| Metric | Baseline | LiteLLM Proxy | Overhead |")
47-
lines.append("|--------|----------|---------------|----------|")
48-
49-
for row in ROWS:
50-
label, key, fmt = row[0], row[1], row[2]
51-
mult = row[3] if len(row) > 3 else 1
52-
b = baseline.get(key, 0) * mult
53-
p = proxy.get(key, 0) * mult
54-
lines.append(f"| {label} | {format(b, fmt)} | {format(p, fmt)} | {_overhead(key, b, p)} |")
5564

65+
if ebpf is None:
66+
lines.append("| Metric | Baseline | LiteLLM Proxy | Overhead |")
67+
lines.append("|--------|----------|---------------|----------|")
68+
for row in ROWS:
69+
label, key, fmt = row[0], row[1], row[2]
70+
mult = row[3] if len(row) > 3 else 1
71+
b = baseline.get(key, 0) * mult
72+
p = proxy.get(key, 0) * mult
73+
lines.append(f"| {label} | {format(b, fmt)} | {format(p, fmt)} | {_overhead(key, b, p)} |")
74+
else:
75+
lines.append("| Metric | Baseline | LiteLLM Proxy | honeybeepf-llm | Proxy Overhead | eBPF Overhead |")
76+
lines.append("|--------|----------|---------------|----------------|----------------|----------------|")
77+
for row in ROWS:
78+
label, key, fmt = row[0], row[1], row[2]
79+
mult = row[3] if len(row) > 3 else 1
80+
b = baseline.get(key, 0) * mult
81+
p = proxy.get(key, 0) * mult
82+
e = ebpf.get(key, 0) * mult
83+
lines.append(
84+
f"| {label} | {format(b, fmt)} | {format(p, fmt)} | {format(e, fmt)} "
85+
f"| {_overhead(key, b, p)} | {_overhead(key, b, e)} |"
86+
)
5687
lines.append("")
5788

5889
if "kill" in results:
5990
k = results["kill"]
60-
lines.append("### Kill Test")
91+
lines.append("### Kill Test (LiteLLM Proxy only — single-arm, no baseline/eBPF analog)")
6192
lines.append("")
6293
lines.append(f"- Proxy killed mid-load: **{k['errors']}** requests failed ({k['error_rate']:.0%} error rate)")
6394
lines.append("")

0 commit comments

Comments
 (0)