-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathdataset.rb
More file actions
110 lines (92 loc) · 2.64 KB
/
dataset.rb
File metadata and controls
110 lines (92 loc) · 2.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env ruby
# frozen_string_literal: true
# Example: Running an evaluation against a dataset
#
# This example demonstrates:
# 1. Creating a dataset with test cases
# 2. Running an evaluation using the dataset
# 3. Different ways to specify datasets (string, hash with options)
#
# Usage:
# ruby examples/eval/dataset.rb
require "bundler/setup"
require "braintrust"
Braintrust.init
api = Braintrust::API.new # Uses global state
at_exit { OpenTelemetry.tracer_provider.shutdown }
# Project name
project_name = "ruby-sdk-examples"
# Create a dataset with test cases
dataset_name = "string-transform-#{Time.now.to_i}"
puts "Creating dataset '#{dataset_name}'..."
result = api.datasets.create(
name: dataset_name,
project_name: project_name,
description: "Example dataset for string transformation evaluation"
)
dataset_id = result["dataset"]["id"]
# Insert test cases into the dataset
test_cases = [
{input: "hello", expected: "HELLO"},
{input: "world", expected: "WORLD"},
{input: "ruby", expected: "RUBY"},
{input: "braintrust", expected: "BRAINTRUST"}
]
api.datasets.insert(id: dataset_id, events: test_cases)
# Define task: simple string upcase
task = ->(input:) { input.upcase }
# Define scorer: exact match (named for clarity in results)
scorer = Braintrust::Scorer.new("exact_match") { |expected:, output:|
(output == expected) ? 1.0 : 0.0
}
# Example 1: Run eval with dataset as string (uses same project)
puts "\n" + "=" * 60
puts "Example 1: Dataset as string (same project)"
puts "=" * 60
Braintrust::Eval.run(
project: project_name,
experiment: "dataset-eval-string",
dataset: dataset_name, # Simple string - fetches from same project
task: task,
scorers: [scorer]
)
# Example 2: Run eval with dataset as hash (explicit project)
puts "\n" + "=" * 60
puts "Example 2: Dataset as hash with explicit project"
puts "=" * 60
Braintrust::Eval.run(
project: project_name,
experiment: "dataset-eval-hash",
dataset: {
name: dataset_name,
project: project_name # Explicit project
},
task: task,
scorers: [scorer]
)
# Example 3: Run eval with dataset by ID
puts "\n" + "=" * 60
puts "Example 3: Dataset by ID"
puts "=" * 60
Braintrust::Eval.run(
project: project_name,
experiment: "dataset-eval-id",
dataset: {id: dataset_id}, # Fetch by ID
task: task,
scorers: [scorer]
)
# Example 4: Run eval with dataset limit
puts "\n" + "=" * 60
puts "Example 4: Dataset with record limit"
puts "=" * 60
Braintrust::Eval.run(
project: project_name,
experiment: "dataset-eval-limit",
dataset: {
name: dataset_name,
project: project_name,
limit: 2 # Only use first 2 records
},
task: task,
scorers: [scorer]
)