-
Notifications
You must be signed in to change notification settings - Fork 137
Expand file tree
/
Copy pathresults_writer.ts
More file actions
130 lines (116 loc) · 3.83 KB
/
results_writer.ts
File metadata and controls
130 lines (116 loc) · 3.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/**
* Results writer for persisting test results to JSON file
* Stores latest result per (agentModel, judgeModel, testId) combination
*/
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
import { dirname } from 'node:path';
import type { EvaluationResult, ResultsDatabase, TestResultRecord } from './output_formatter.js';
/**
* Build composite key for storing results
* Format: "{agentModel}:{judgeModel}:{testId}"
*/
export function buildResultKey(agentModel: string, judgeModel: string, testId: string): string {
return `${agentModel}:${judgeModel}:${testId}`;
}
/**
* Load existing results database from file
* Returns empty database if file doesn't exist
*/
export function loadResultsDatabase(filePath: string): ResultsDatabase {
if (!existsSync(filePath)) {
return {
version: '1.0',
results: {},
};
}
try {
const fileContent = readFileSync(filePath, 'utf-8');
const data = JSON.parse(fileContent) as ResultsDatabase;
// Validate structure
if (!data.version || !data.results || typeof data.results !== 'object') {
throw new Error('Invalid database structure: missing version or results field');
}
return data;
} catch (error) {
throw new Error(
`Failed to load results database from ${filePath}: ${error instanceof Error ? error.message : String(error)}`,
);
}
}
/**
* Save results database to file with pretty formatting
*/
export function saveResultsDatabase(filePath: string, database: ResultsDatabase): void {
try {
// Ensure parent directory exists
const dir = dirname(filePath);
if (!existsSync(dir)) {
mkdirSync(dir, { recursive: true });
}
// Write with pretty formatting (2-space indent)
const json = JSON.stringify(database, null, 2);
writeFileSync(filePath, json, 'utf-8');
} catch (error) {
throw new Error(
`Failed to save results database to ${filePath}: ${error instanceof Error ? error.message : String(error)}`,
);
}
}
/**
* Convert EvaluationResult to TestResultRecord
*/
export function convertEvaluationResultToRecord(
result: EvaluationResult,
agentModel: string,
judgeModel: string,
): TestResultRecord {
// Handle error cases
if (result.error) {
return {
timestamp: new Date().toISOString(),
agentModel,
judgeModel,
testId: result.testCase.id,
verdict: 'FAIL',
reason: result.error,
durationMs: result.durationMs,
turns: result.conversation.totalTurns,
error: result.error,
};
}
// Normal case
return {
timestamp: new Date().toISOString(),
agentModel,
judgeModel,
testId: result.testCase.id,
verdict: result.judgeResult.verdict,
reason: result.judgeResult.reason,
durationMs: result.durationMs,
turns: result.conversation.totalTurns,
error: null,
};
}
/**
* Update results database with new evaluation results
* Only updates entries for tests that ran (preserves other entries)
*/
export function updateResultsWithEvaluations(
database: ResultsDatabase,
results: EvaluationResult[],
agentModel: string,
judgeModel: string,
): ResultsDatabase {
// Clone database to avoid mutation
const updatedDatabase: ResultsDatabase = {
version: database.version,
results: { ...database.results },
};
// Update each test result
for (const result of results) {
const record = convertEvaluationResultToRecord(result, agentModel, judgeModel);
const key = buildResultKey(agentModel, judgeModel, result.testCase.id);
updatedDatabase.results[key] = record;
}
return updatedDatabase;
}