Skip to content

Commit 711962a

Browse files
Nina KollmanNina Kollman
authored andcommitted
evaluator slug optional
1 parent 81bc188 commit 711962a

File tree

8 files changed

+49
-33
lines changed

8 files changed

+49
-33
lines changed

packages/sample-app/src/sample_experiment.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,16 @@ const main = async () => {
8787
datasetSlug: "medical-q",
8888
datasetVersion: "v1",
8989
evaluators: [{ name: "medical_advice" }],
90-
experimentSlug: "medical-advice-exp-ts",
90+
// experimentSlug: "medical-advice-exp-ts",
9191
stopOnError: false,
9292
waitForResults: true,
9393
});
9494

9595
console.log(`✅ Completed refuse advice experiment:`);
96-
console.log(` - Results: ${results1.results.length}`);
96+
console.log(` - Results: ${results1.taskResults.length}`);
9797
console.log(` - Errors: ${results1.errors.length}`);
9898
console.log(` - Experiment ID: ${results1.experimentId}`);
99-
console.log("Results:", results1.results);
99+
console.log("Evaluation Results:", results1.evaluations);
100100

101101
console.log("\n🧪 Running experiment with comprehensive medical info prompt...");
102102

@@ -117,7 +117,7 @@ const main = async () => {
117117
// Compare results
118118
console.log("\n📊 Experiment Comparison:");
119119
console.log("Refuse Advice Strategy:");
120-
results1.results.slice(0, 2).forEach((result: TaskResponse, i: number) => {
120+
results1.taskResults.slice(0, 2).forEach((result: TaskResponse, i: number) => {
121121
console.log(` Sample ${i + 1}:`);
122122
console.log(` Question: ${result.input?.question || 'N/A'}`);
123123
console.log(` Response: ${result.output?.completion?.substring(0, 100) || 'N/A'}...`);

packages/traceloop-sdk/src/lib/client/experiment/experiment.ts

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,15 @@ export class Experiment {
2525
this.datasets = new Datasets(client);
2626
}
2727

28+
/**
29+
* Generate a unique experiment slug
30+
*/
31+
private generateExperimentSlug(): string {
32+
const timestamp = Date.now().toString(36);
33+
const random = Math.random().toString(36).substring(2, 7);
34+
return `exp-${timestamp}${random}`.substring(0, 15);
35+
}
36+
2837
private async handleResponse(response: Response) {
2938
if (!response.ok) {
3039
let errorMessage = `HTTP ${response.status}: ${response.statusText}`;
@@ -66,28 +75,24 @@ export class Experiment {
6675
datasetSlug,
6776
datasetVersion,
6877
evaluators = [],
69-
experimentSlug,
7078
waitForResults = true,
7179
} = options;
7280

73-
// Validate inputs
81+
// When experimentSlug is not provided a random one is generated
82+
let { experimentSlug } = options;
83+
if (!experimentSlug) {
84+
experimentSlug = this.client.experimentSlug || this.generateExperimentSlug();
85+
}
86+
7487
this.validateRunOptions(task, options);
7588

7689
try {
77-
if (!experimentSlug) {
78-
throw new Error('Experiment slug is required'); // TODO nina
79-
}
80-
81-
// 1. Initialize experiment
82-
console.log(`🔧 Step 1: Initializing experiment with slug: ${experimentSlug}`);
8390
const experimentResponse = await this.initializeExperiment({
84-
slug: experimentSlug || 'default-experiment',
91+
slug: experimentSlug,
8592
datasetSlug,
8693
datasetVersion,
8794
});
8895
console.log(`✅ Step 1: Experiment initialized with ID: ${experimentResponse.experiment.id}`);
89-
90-
// 2. Get dataset rows
9196
console.log(`🔧 Step 2: Getting dataset rows for: ${datasetSlug}, version: ${datasetVersion}`);
9297
const rows = await this.getDatasetRows(datasetSlug, datasetVersion);
9398
console.log(`✅ Step 2: Retrieved ${rows.length} rows from dataset`);
@@ -111,10 +116,8 @@ export class Experiment {
111116
timestamp: Date.now()
112117
};
113118

114-
// Add to results array
115119
taskResults.push(taskResponse);
116120

117-
// Create task
118121
const response = await this.createTask(
119122
experimentSlug,
120123
experimentResponse.run.id,
@@ -139,12 +142,14 @@ export class Experiment {
139142
}
140143
}
141144

145+
const evalResults = evaluationResults.map((evaluation) => evaluation.result);
146+
console.log("evalResults", evalResults);
142147
return {
143-
results: taskResults,
148+
taskResults: taskResults,
144149
errors: taskErrors,
145150
experimentId: experimentResponse.experiment.id,
146151
runId: experimentResponse.run.id,
147-
evaluations: evaluationResults
152+
evaluations: evalResults
148153
};
149154

150155
} catch (error) {
@@ -204,7 +209,6 @@ export class Experiment {
204209
return data;
205210
}
206211

207-
208212
/**
209213
* Parse JSONL string into list of {col_name: col_value} dictionaries
210214
* Skips the first line (columns definition)
@@ -231,7 +235,6 @@ export class Experiment {
231235
return rows;
232236
}
233237

234-
235238
/**
236239
* Get dataset rows for experiment execution
237240
*/

packages/traceloop-sdk/src/lib/client/traceloop-client.ts

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ export class TraceloopClient {
2323
public appName: string;
2424
private baseUrl: string;
2525
private apiKey: string;
26+
public experimentSlug?: string;
27+
28+
public userFeedback: UserFeedback;
29+
public datasets: Datasets;
30+
public experiment: Experiment;
31+
public evaluator: Evaluator;
2632

2733
/**
2834
* Creates a new instance of the TraceloopClient.
@@ -36,12 +42,13 @@ export class TraceloopClient {
3642
options.baseUrl ||
3743
process.env.TRACELOOP_BASE_URL ||
3844
"https://api.traceloop.com";
39-
}
45+
this.experimentSlug = options.experimentSlug;
4046

41-
userFeedback = new UserFeedback(this);
42-
datasets = new Datasets(this);
43-
experiment = new Experiment(this);
44-
evaluator = new Evaluator(this);
47+
this.userFeedback = new UserFeedback(this);
48+
this.datasets = new Datasets(this);
49+
this.experiment = new Experiment(this);
50+
this.evaluator = new Evaluator(this);
51+
}
4552

4653
async post(path: string, body: Record<string, unknown> | any) {
4754
return await fetch(`${this.baseUrl}${path}`, {

packages/traceloop-sdk/src/lib/configuration/index.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ export const initialize = (options: InitializeOptions = {}) => {
3939
if (!options.appName) {
4040
options.appName = process.env.npm_package_name;
4141
}
42+
if (!options.experimentSlug) {
43+
options.experimentSlug = process.env.TRACELOOP_EXP_SLUG;
44+
}
4245

4346
if (options.traceloopSyncEnabled === undefined) {
4447
if (process.env.TRACELOOP_SYNC_ENABLED !== undefined) {
@@ -96,6 +99,7 @@ export const initialize = (options: InitializeOptions = {}) => {
9699
apiKey: options.apiKey,
97100
baseUrl: options.baseUrl,
98101
appName: options.appName!,
102+
experimentSlug: options.experimentSlug,
99103
});
100104
return _client;
101105
}

packages/traceloop-sdk/src/lib/interfaces/experiment.interface.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@ export interface TaskResponse {
3131
}
3232

3333
export interface ExperimentRunResult {
34-
results: TaskResponse[];
34+
taskResults: TaskResponse[];
3535
errors: string[];
3636
experimentId?: string;
3737
runId?: string;
38-
evaluations?: ExecutionResponse[];
38+
evaluations?: Record<string, any>[];
3939
}
4040

4141
export interface InitExperimentRequest {

packages/traceloop-sdk/src/lib/interfaces/index.ts

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,3 @@ export * from "./traceloop-client.interface";
55
export * from "./dataset.interface";
66
export * from "./experiment.interface";
77
export * from "./evaluator.interface";
8-
export interface TraceloopClientOptions {
9-
apiKey: string;
10-
appName: string;
11-
baseUrl?: string;
12-
}

packages/traceloop-sdk/src/lib/interfaces/initialize-options.interface.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,4 +135,10 @@ export interface InitializeOptions {
135135
* Defaults to true.
136136
*/
137137
tracingEnabled?: boolean;
138+
139+
/**
140+
* The experiment slug to use when running experiments. Optional.
141+
* Defaults to the TRACELOOP_EXP_SLUG environment variable.
142+
*/
143+
experimentSlug?: string;
138144
}

packages/traceloop-sdk/src/lib/interfaces/traceloop-client.interface.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ export interface TraceloopClientOptions {
22
apiKey: string;
33
appName: string;
44
baseUrl?: string;
5+
experimentSlug?: string;
56
}

0 commit comments

Comments
 (0)