traceloop
diff --git a/‎.prettierignore‎
Lines changed: 2 additions & 1 deletion b/‎.prettierignore‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎package.json‎
Lines changed: 6 additions & 1 deletion b/‎package.json‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎packages/sample-app/package.json‎
Lines changed: 1 addition & 0 deletions b/‎packages/sample-app/package.json‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎packages/sample-app/src/sample_security_experiment.ts‎
Lines changed: 172 additions & 0 deletions b/‎packages/sample-app/src/sample_security_experiment.ts‎
Lines changed: 172 additions & 0 deletions
diff --git a/‎packages/traceloop-sdk/src/lib/client/evaluator/evaluator.ts‎
Lines changed: 12 additions & 2 deletions b/‎packages/traceloop-sdk/src/lib/client/evaluator/evaluator.ts‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎packages/traceloop-sdk/src/lib/client/experiment/experiment.ts‎
Lines changed: 0 additions & 9 deletions b/‎packages/traceloop-sdk/src/lib/client/experiment/experiment.ts‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎packages/traceloop-sdk/src/lib/generated/evaluators/index.ts‎
Lines changed: 22 additions & 0 deletions b/‎packages/traceloop-sdk/src/lib/generated/evaluators/index.ts‎
Lines changed: 22 additions & 0 deletions
@@ -1,2 +1,3 @@
 pnpm-lock.yaml
-pnpm-workspace.yaml
+pnpm-workspace.yaml
+**/generated/**
@@ -4,10 +4,15 @@
   "license": "Apache-2.0",
   "scripts": {
     "build:all": "pnpm nx run-many -t build",
-    "build:affected": "pnpm nx affected -t build"
+    "build:affected": "pnpm nx affected -t build",
+    "generate:evaluator-models": "./scripts/generate-models.sh"
   },
   "private": true,
   "devDependencies": {
+    "@apidevtools/swagger-parser": "^10.1.0",
+    "@types/node": "^24.0.15",
+    "openapi-typescript": "^7.4.0",
+    "ts-node": "^10.9.2",
     "@commitlint/cli": "^19.8.1",
     "@commitlint/config-conventional": "^19.8.1",
     "@eslint/eslintrc": "^3.3.1",
 
@@ -40,6 +40,7 @@
     "run:sample_generate": "npm run build && node dist/src/test_generate_only.js",
     "run:sample_experiment": "npm run build && node dist/src/sample_experiment.js",
     "run:github_experiment": "npm run build && node dist/src/sample_github_experiment.js",
+    "run:security_experiment": "npm run build && node dist/src/sample_security_experiment.js",
     "run:mcp": "npm run build && node dist/src/sample_mcp.js",
     "run:mcp:real": "npm run build && node dist/src/sample_mcp_real.js",
     "run:mcp:working": "npm run build && node dist/src/sample_mcp_working.js",
 
@@ -0,0 +1,172 @@
+/**
+ * Security Evaluators Experiment
+ *
+ * This example demonstrates Traceloop's security evaluators:
+ * - PII Detector: Identifies personal information exposure
+ * - Secrets Detector: Monitors for credential and key leaks
+ * - Prompt Injection: Detects prompt injection attempts
+ *
+ * These evaluators help ensure your AI applications don't leak sensitive data
+ * or fall victim to prompt injection attacks.
+ */
+
+import OpenAI from "openai";
+import {
+  TraceloopClient,
+  EvaluatorMadeByTraceloop,
+  type TaskInput,
+  type TaskOutput,
+  type TaskResponse,
+} from "@traceloop/node-server-sdk";
+
+// Initialize OpenAI client
+const openai = new OpenAI({
+  apiKey: process.env.OPENAI_API_KEY,
+});
+
+// Initialize Traceloop client
+const traceloop = new TraceloopClient({
+  apiKey: process.env.TRACELOOP_API_KEY!,
+  appName: "security-evaluators-sample",
+  baseUrl: process.env.TRACELOOP_API_ENDPOINT,
+});
+
+/**
+ * Generate a response using OpenAI
+ */
+async function generateResponse(prompt: string): Promise<string> {
+  const response = await openai.chat.completions.create({
+    model: "gpt-3.5-turbo",
+    messages: [{ role: "user", content: prompt }],
+    temperature: 0.7,
+    max_tokens: 200,
+  });
+
+  return response.choices[0].message.content || "";
+}
+
+/**
+ * Task function that processes user queries.
+ * Returns text that will be evaluated for security issues.
+ */
+async function securityTask(row: TaskInput): Promise<TaskOutput> {
+  const userQuery = (row.query as string) || "";
+
+  // Generate response
+  const response = await generateResponse(userQuery);
+
+  // Return data for evaluation
+  return {
+    text: response, // The text to check for PII, secrets, and prompt injection
+    prompt: userQuery, // Required for prompt injection detector
+  };
+}
+
+/**
+ * Run experiment with security evaluators.
+ *
+ * This experiment will evaluate responses for:
+ * 1. PII (Personal Identifiable Information)
+ * 2. Secrets (API keys, passwords, tokens)
+ * 3. Prompt Injection attempts
+ */
+async function runSecurityExperiment(): Promise<void> {
+  console.log("\n" + "=".repeat(80));
+  console.log("SECURITY EVALUATORS EXPERIMENT");
+  console.log("=".repeat(80) + "\n");
+
+  console.log(
+    "This experiment will test three critical security evaluators:\n",
+  );
+  console.log(
+    "1. PII Detector - Identifies personal information (names, emails, SSN, etc.)",
+  );
+  console.log(
+    "2. Secrets Detector - Finds API keys, passwords, and credentials",
+  );
+  console.log(
+    "3. Prompt Injection - Detects attempts to manipulate the AI system",
+  );
+  console.log("\n" + "-".repeat(80) + "\n");
+
+  // Configure security evaluators using the generated factory methods
+  const evaluators = [
+    EvaluatorMadeByTraceloop.piiDetector({ probability_threshold: 0.7 }),
+    EvaluatorMadeByTraceloop.secretsDetector(),
+    EvaluatorMadeByTraceloop.promptInjection({ threshold: 0.6 }),
+  ];
+
+  console.log("Configured evaluators:");
+  evaluators.forEach((e) => console.log(`  - ${e.name}`));
+  console.log("\n" + "-".repeat(80) + "\n");
+
+  // Run the experiment
+  const result = await traceloop.experiment.run(securityTask, {
+    datasetSlug: "security", // Set a dataset slug that exists in the traceloop platform
+    datasetVersion: "v1",
+    evaluators,
+    experimentSlug: "security-evaluators-exp",
+    stopOnError: false,
+    waitForResults: true,
+  });
+
+  // Type guard: check if this is a local run result (ExperimentRunResult)
+  if ("taskResults" in result) {
+    const { taskResults, errors, experimentId, runId } = result;
+
+    console.log("\n" + "=".repeat(80));
+    console.log("Security experiment completed!");
+    console.log(`Experiment ID: ${experimentId}`);
+    console.log(`Run ID: ${runId}`);
+    console.log(
+      `Task Results: ${taskResults.length}, Errors: ${errors.length}`,
+    );
+    console.log("=".repeat(80) + "\n");
+
+    // Print results summary
+    if (taskResults.length > 0) {
+      console.log("Results summary:");
+      taskResults.forEach((taskResult: TaskResponse, idx: number) => {
+        console.log(`\nTask ${idx + 1}:`);
+        console.log(
+          `  Input: ${JSON.stringify(taskResult.input).substring(0, 100)}...`,
+        );
+        console.log(
+          `  Output: ${JSON.stringify(taskResult.output || {}).substring(0, 100)}...`,
+        );
+        if (taskResult.error) {
+          console.log(`  Error: ${taskResult.error}`);
+        }
+      });
+    }
+
+    if (errors.length > 0) {
+      console.log("\nErrors:");
+      errors.forEach((error: string, idx: number) => {
+        console.log(`  ${idx + 1}. ${error}`);
+      });
+    }
+  } else {
+    // GitHub Actions result
+    console.log("\n" + "=".repeat(80));
+    console.log("Experiment submitted to GitHub Actions!");
+    console.log(`Experiment ID: ${result.experimentId}`);
+    console.log(`Experiment Slug: ${result.experimentSlug}`);
+    console.log(`Run ID: ${result.runId}`);
+    console.log("=".repeat(80) + "\n");
+  }
+}
+
+// Main entry point
+async function main(): Promise<void> {
+  console.log("\nSecurity Evaluators Experiment\n");
+
+  try {
+    await runSecurityExperiment();
+  } catch (error) {
+    console.error("Experiment failed:", error);
+    process.exit(1);
+  }
+}
+
+main();
@@ -66,26 +66,36 @@ export class Evaluator extends BaseDatasetEntity {
       throw new Error("experimentId, evaluator, and taskResult are required");
     }
 
-    // Handle both string and object evaluator types
+    // Handle string, EvaluatorWithVersion, and EvaluatorWithConfig types
     const evaluatorName =
       typeof evaluator === "string" ? evaluator : evaluator.name;
     const evaluatorVersion =
       typeof evaluator === "string" ? undefined : evaluator.version;
+    // Extract config if present (EvaluatorWithConfig type)
+    const evaluatorConfig =
+      typeof evaluator === "object" && "config" in evaluator
+        ? evaluator.config
+        : undefined;
 
     if (!evaluatorName) {
       throw new Error("evaluator name is required");
     }
 
     const inputSchemaMapping = this.createInputSchemaMapping(taskResult);
 
-    const payload = {
+    const payload: Record<string, unknown> = {
       experiment_id: experimentId,
       experiment_run_id: experimentRunId,
       evaluator_version: evaluatorVersion,
       task_id: taskId,
       input_schema_mapping: inputSchemaMapping,
     };
 
+    // Add evaluator config if present
+    if (evaluatorConfig && Object.keys(evaluatorConfig).length > 0) {
+      payload.evaluator_config = evaluatorConfig;
+    }
+
     const response = await this.client.post(
       `/v2/evaluators/slug/${evaluatorName}/execute`,
       payload,
 
@@ -329,15 +329,6 @@ export class Experiment {
               `Evaluator at index ${index} must have a valid non-empty name`,
             );
           }
-          if (
-            !evaluator.version ||
-            typeof evaluator.version !== "string" ||
-            !evaluator.version.trim()
-          ) {
-            throw new Error(
-              `Evaluator at index ${index} must have a valid non-empty version`,
-            );
-          }
         }
       });
     }
 
@@ -0,0 +1,22 @@
+// Auto-generated - DO NOT EDIT
+// Regenerate with: pnpm generate:evaluator-models
+
+export {
+  EVALUATOR_SLUGS,
+  EVALUATOR_SCHEMAS,
+  getEvaluatorSchema,
+  isValidEvaluatorSlug,
+} from './registry';
+
+export type { EvaluatorSlug, EvaluatorSchema } from './registry';
+
+export {
+  EvaluatorMadeByTraceloop,
+  createEvaluator,
+  validateEvaluatorInput,
+  getAvailableEvaluatorSlugs,
+  getEvaluatorSchemaInfo,
+} from './mbt-evaluators';
+
+// Re-export config types
+export type * from './mbt-evaluators';
Original file line number	Diff line number	Diff line change
`@@ -329,15 +329,6 @@ export class Experiment {`
`329`	`329`	`Evaluator at index ${index} must have a valid non-empty name`,
`330`	`330`	`);`
`331`	`331`	`}`
`332`		`- if (`
`333`		`- !evaluator.version \|\|`
`334`		`- typeof evaluator.version !== "string" \|\|`
`335`		`- !evaluator.version.trim()`
`336`		`- ) {`
`337`		`- throw new Error(`
`338`		- `Evaluator at index ${index} must have a valid non-empty version`,
`339`		`- );`
`340`		`- }`
`341`	`332`	`}`
`342`	`333`	`});`
`343`	`334`	`}`