Skip to content

Commit 45b4b22

Browse files
Add result formatter class for a better output readability
1 parent 223478c commit 45b4b22

File tree

1 file changed

+316
-0
lines changed

1 file changed

+316
-0
lines changed

benchmark/ResultFormatter.php

Lines changed: 316 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,316 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace PHPVector\Benchmark;
6+
7+
use PHPVector\HNSW\Config as HNSWConfig;
8+
9+
final class ResultFormatter
10+
{
11+
/**
12+
* @param array<string, array<string, mixed>> $results
13+
* @return array<int, array{name: string, unit: string, value: float}>
14+
*/
15+
public static function toGitHubBenchmark(array $results): array
16+
{
17+
$formatted = [];
18+
19+
foreach ($results as $scenarioKey => $scenarioResults) {
20+
$prefix = count($results) > 1 ? "{$scenarioKey}/" : '';
21+
22+
foreach ($scenarioResults as $opName => $metrics) {
23+
if ($opName === 'scenario' || $opName === 'recall') {
24+
continue;
25+
}
26+
27+
if ($opName === 'persistence') {
28+
foreach ($metrics as $subOp => $subMetrics) {
29+
if (isset($subMetrics['throughput_mb_s'])) {
30+
$formatted[] = [
31+
'name' => "{$prefix}{$subOp} (MB/s)",
32+
'unit' => 'MB/s',
33+
'value' => round($subMetrics['throughput_mb_s'], 2),
34+
];
35+
}
36+
if (isset($subMetrics['disk_size_mb'])) {
37+
$formatted[] = [
38+
'name' => "{$prefix}{$subOp} (disk size)",
39+
'unit' => 'MB',
40+
'value' => round($subMetrics['disk_size_mb'], 2),
41+
];
42+
}
43+
if (isset($subMetrics['memory_delta_mb'])) {
44+
$formatted[] = [
45+
'name' => "{$prefix}{$subOp} (memory delta)",
46+
'unit' => 'MB',
47+
'value' => round($subMetrics['memory_delta_mb'], 2),
48+
];
49+
}
50+
}
51+
continue;
52+
}
53+
54+
if (isset($metrics['ops_per_second'])) {
55+
$formatted[] = [
56+
'name' => "{$prefix}{$opName} (ops/s)",
57+
'unit' => 'ops/s',
58+
'value' => round($metrics['ops_per_second'], 2),
59+
];
60+
}
61+
if (isset($metrics['qps'])) {
62+
$formatted[] = [
63+
'name' => "{$prefix}{$opName} (QPS)",
64+
'unit' => 'queries/s',
65+
'value' => round($metrics['qps'], 2),
66+
];
67+
}
68+
if (isset($metrics['memory_delta_mb'])) {
69+
$formatted[] = [
70+
'name' => "{$prefix}{$opName} (memory delta)",
71+
'unit' => 'MB',
72+
'value' => round($metrics['memory_delta_mb'], 2),
73+
];
74+
}
75+
}
76+
}
77+
78+
return $formatted;
79+
}
80+
81+
/**
82+
* @param array<string, array<string, mixed>> $results
83+
* @param array{k: int, queries: int, recall_samples: int} $options
84+
*/
85+
public static function toMarkdown(
86+
array $results,
87+
HNSWConfig $hnswConfig,
88+
array $options,
89+
): string {
90+
$k = $options['k'];
91+
$queries = $options['queries'];
92+
$recallSamples = $options['recall_samples'];
93+
94+
$lines = [];
95+
96+
// Header
97+
$lines[] = '# PHPVector Benchmark Report';
98+
$lines[] = '';
99+
$lines[] = sprintf('> **Generated:** %s ', date('Y-m-d H:i:s'));
100+
$lines[] = sprintf('> **PHP:** %s ', PHP_VERSION);
101+
$lines[] = sprintf('> **OS:** %s %s ', PHP_OS_FAMILY, php_uname('m'));
102+
$lines[] = sprintf(
103+
'> **HNSW config:** M=%d efConstruction=%d efSearch=%d distance=%s',
104+
$hnswConfig->M,
105+
$hnswConfig->efConstruction,
106+
$hnswConfig->efSearch,
107+
$hnswConfig->distance->name,
108+
);
109+
$lines[] = sprintf(
110+
'> **Queries:** %s per scenario top-%d %s recall samples',
111+
number_format($queries), $k, number_format($recallSamples),
112+
);
113+
$lines[] = '';
114+
115+
// Summary table
116+
$lines[] = '## Summary';
117+
$lines[] = '';
118+
119+
$hasRecall = false;
120+
$hasPersist = false;
121+
$hasTextSearch = false;
122+
foreach ($results as $r) {
123+
if (isset($r['recall'])) {
124+
$hasRecall = true;
125+
}
126+
if (isset($r['persistence'])) {
127+
$hasPersist = true;
128+
}
129+
if (isset($r['text_search'])) {
130+
$hasTextSearch = true;
131+
}
132+
}
133+
134+
$headers = ['Scenario', 'Vectors', 'Dims', 'Build time', 'Insert/s', 'Vector QPS', 'P99 ms'];
135+
if ($hasTextSearch) {
136+
$headers[] = 'Text QPS';
137+
}
138+
if ($hasRecall) {
139+
$headers[] = "Recall@{$k}";
140+
}
141+
142+
$lines[] = '| ' . implode(' | ', $headers) . ' |';
143+
$lines[] = '|' . implode('|', array_fill(0, count($headers), '---')) . '|';
144+
145+
foreach ($results as $r) {
146+
$s = $r['scenario'];
147+
$row = [
148+
"**{$s['label']}** - {$s['desc']}",
149+
number_format($s['n']),
150+
(string) $s['dims'],
151+
self::fmtTime($r['insert']['total_time_s']),
152+
number_format((int) $r['insert']['ops_per_second']),
153+
number_format((int) $r['vector_search']['qps']),
154+
number_format($r['vector_search']['latency_p99_ms'], 1),
155+
];
156+
157+
if ($hasTextSearch) {
158+
$row[] = isset($r['text_search'])
159+
? number_format((int) $r['text_search']['qps'])
160+
: '-';
161+
}
162+
if ($hasRecall) {
163+
$row[] = isset($r['recall'])
164+
? number_format($r['recall'][$k] * 100.0, 1) . '%'
165+
: '-';
166+
}
167+
168+
$lines[] = '| ' . implode(' | ', $row) . ' |';
169+
}
170+
171+
$lines[] = '';
172+
$lines[] = '---';
173+
$lines[] = '';
174+
175+
// Per-scenario detail
176+
foreach ($results as $r) {
177+
$s = $r['scenario'];
178+
$lines[] = "## {$s['label']} - {$s['desc']}";
179+
$lines[] = '';
180+
181+
// Insert
182+
$lines[] = '### Insert';
183+
$lines[] = '';
184+
$lines[] = '| Metric | Value |';
185+
$lines[] = '|--------|-------|';
186+
$lines[] = sprintf('| Vectors inserted | %s |', number_format($s['n']));
187+
$lines[] = sprintf('| Build time | %s |', self::fmtTime($r['insert']['total_time_s']));
188+
$lines[] = sprintf('| Throughput | %s doc/s |', number_format((int) $r['insert']['ops_per_second']));
189+
$lines[] = sprintf('| Memory delta | %s |', self::fmtMb($r['insert']['memory_delta_mb']));
190+
$lines[] = '';
191+
192+
// Vector search
193+
$lines[] = sprintf('### Vector search (%s queries, k=%d)', number_format($queries), $k);
194+
$lines[] = '';
195+
$lines[] = '| Metric | Value |';
196+
$lines[] = '|--------|-------|';
197+
$lines[] = sprintf('| QPS | %s |', number_format((int) $r['vector_search']['qps']));
198+
$lines[] = sprintf('| P50 | %.2f ms |', $r['vector_search']['latency_p50_ms']);
199+
$lines[] = sprintf('| P95 | %.2f ms |', $r['vector_search']['latency_p95_ms']);
200+
$lines[] = sprintf('| P99 | %.2f ms |', $r['vector_search']['latency_p99_ms']);
201+
$lines[] = '';
202+
203+
// Text search
204+
if (isset($r['text_search'])) {
205+
$lines[] = sprintf('### Text search (%s queries)', number_format($queries));
206+
$lines[] = '';
207+
$lines[] = '| Metric | Value |';
208+
$lines[] = '|--------|-------|';
209+
$lines[] = sprintf('| QPS | %s |', number_format((int) $r['text_search']['qps']));
210+
$lines[] = sprintf('| P50 | %.2f ms |', $r['text_search']['latency_p50_ms']);
211+
$lines[] = sprintf('| P95 | %.2f ms |', $r['text_search']['latency_p95_ms']);
212+
$lines[] = sprintf('| P99 | %.2f ms |', $r['text_search']['latency_p99_ms']);
213+
$lines[] = '';
214+
}
215+
216+
// Hybrid search
217+
if (isset($r['hybrid_search'])) {
218+
$lines[] = sprintf('### Hybrid search (%s queries)', number_format($queries));
219+
$lines[] = '';
220+
$lines[] = '| Metric | Value |';
221+
$lines[] = '|--------|-------|';
222+
$lines[] = sprintf('| QPS | %s |', number_format((int) $r['hybrid_search']['qps']));
223+
$lines[] = sprintf('| P50 | %.2f ms |', $r['hybrid_search']['latency_p50_ms']);
224+
$lines[] = sprintf('| P95 | %.2f ms |', $r['hybrid_search']['latency_p95_ms']);
225+
$lines[] = sprintf('| P99 | %.2f ms |', $r['hybrid_search']['latency_p99_ms']);
226+
$lines[] = '';
227+
}
228+
229+
// Update
230+
if (isset($r['update'])) {
231+
$lines[] = '### Update';
232+
$lines[] = '';
233+
$lines[] = '| Metric | Value |';
234+
$lines[] = '|--------|-------|';
235+
$lines[] = sprintf('| Operations | %s |', number_format($r['update']['operations']));
236+
$lines[] = sprintf('| Throughput | %s ops/s |', number_format((int) $r['update']['ops_per_second']));
237+
$lines[] = '';
238+
}
239+
240+
// Delete
241+
if (isset($r['delete'])) {
242+
$lines[] = '### Delete';
243+
$lines[] = '';
244+
$lines[] = '| Metric | Value |';
245+
$lines[] = '|--------|-------|';
246+
$lines[] = sprintf('| Operations | %s |', number_format($r['delete']['operations']));
247+
$lines[] = sprintf('| Throughput | %s ops/s |', number_format((int) $r['delete']['ops_per_second']));
248+
$lines[] = '';
249+
}
250+
251+
// Recall
252+
if (isset($r['recall'])) {
253+
$lines[] = sprintf('### Recall (%s samples)', number_format($recallSamples));
254+
$lines[] = '';
255+
$lines[] = '| k | Recall |';
256+
$lines[] = '|---|--------|';
257+
foreach ($r['recall'] as $kv => $recall) {
258+
$lines[] = sprintf('| %d | %.1f%% |', $kv, $recall * 100.0);
259+
}
260+
$lines[] = '';
261+
}
262+
263+
// Persistence
264+
if (isset($r['persistence'])) {
265+
$p = $r['persistence'];
266+
$lines[] = '### Persistence';
267+
$lines[] = '';
268+
$lines[] = '| Operation | Disk size | Time | Throughput |';
269+
$lines[] = '|-----------|-----------|------|------------|';
270+
$lines[] = sprintf('| `save()` | %s | %s | %.1f MB/s |',
271+
self::fmtMb($p['save']['disk_size_mb']),
272+
self::fmtTime($p['save']['total_time_s']),
273+
$p['save']['throughput_mb_s'],
274+
);
275+
$lines[] = sprintf('| `open()` | %s | %s | %.1f MB/s |',
276+
self::fmtMb($p['save']['disk_size_mb']),
277+
self::fmtTime($p['open']['total_time_s']),
278+
$p['open']['throughput_mb_s'],
279+
);
280+
$lines[] = '';
281+
}
282+
283+
$lines[] = '---';
284+
$lines[] = '';
285+
}
286+
287+
$lines[] = '*Benchmark methodology follows [VectorDBBench](https://github.com/zilliztech/VectorDBBench): '
288+
. 'serial QPS, P99 tail latency, and Recall@k against brute-force ground truth on synthetic '
289+
. 'unit-normalised vectors (reproducible seed).*';
290+
$lines[] = '';
291+
292+
return implode("\n", $lines);
293+
}
294+
295+
private static function fmtTime(float $s): string
296+
{
297+
if ($s >= 60.0) {
298+
return sprintf('%dm %ds', (int) ($s / 60), (int) fmod($s, 60));
299+
}
300+
if ($s >= 1.0) {
301+
return number_format($s, 2) . ' s';
302+
}
303+
if ($s >= 0.001) {
304+
return number_format($s * 1_000, 0) . ' ms';
305+
}
306+
return number_format($s * 1_000_000, 0) . ' us';
307+
}
308+
309+
private static function fmtMb(float $mb): string
310+
{
311+
if ($mb >= 1_024.0) {
312+
return number_format($mb / 1_024.0, 2) . ' GB';
313+
}
314+
return number_format($mb, 1) . ' MB';
315+
}
316+
}

0 commit comments

Comments
 (0)