-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathremote_functions.rb
More file actions
executable file
·127 lines (113 loc) · 3.4 KB
/
remote_functions.rb
File metadata and controls
executable file
·127 lines (113 loc) · 3.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env ruby
# frozen_string_literal: true
# Example: Using remote functions (server-side prompts) in evaluations
#
# This example demonstrates how to:
# 1. Create a remote task function (prompt) on the Braintrust server
# 2. Create a remote scorer function with LLM classifier and choices
# 3. Use both remote task and scorer in Eval.run
#
# Benefits of remote functions:
# - Centralized prompt management
# - Version control for prompts
# - No need to deploy prompt changes with code
# - Consistent prompt execution across environments
# - Remote scorers use choice_scores for deterministic scoring
require "bundler/setup"
require "braintrust"
require "braintrust/eval"
require "braintrust/functions"
# Initialize Braintrust with tracing enabled (default)
Braintrust.init
project_name = "ruby-sdk-examples"
# First, let's create remote functions (task + scorer) on the server
# In practice, you would create these once via the UI or API
puts "Creating remote functions..."
api = Braintrust::API.new
function_slug = "food-classifier-#{Time.now.to_i}"
api.functions.create(
project_name: project_name,
slug: function_slug,
function_data: {type: "prompt"},
prompt_data: {
prompt: {
type: "chat",
messages: [
{
role: "system",
content: "You are a food classifier. Classify the input as 'fruit' or 'vegetable'. Return ONLY the classification, nothing else."
},
{
role: "user",
content: "Classify: {{input}}"
}
]
},
options: {
model: "gpt-4o-mini",
params: {temperature: 0}
}
}
)
puts "Created task function: #{function_slug}"
# Create a remote scorer function (uses LLM classifier with choices)
scorer_slug = "classification-scorer-#{Time.now.to_i}"
api.functions.create(
project_name: project_name,
slug: scorer_slug,
function_data: {type: "prompt"},
prompt_data: {
parser: {
type: "llm_classifier",
use_cot: true,
choice_scores: {
"correct" => 1.0,
"incorrect" => 0.0
}
},
prompt: {
type: "chat",
messages: [
{
role: "system",
content: "You are a scorer evaluating food classifications."
},
{
role: "user",
content: "Expected: {{expected}}\nActual output: {{output}}\n\nDoes the output correctly classify the food? Choose 'correct' if it matches (case-insensitive), otherwise 'incorrect'."
}
]
},
options: {
model: "gpt-4o-mini",
params: {temperature: 0, use_cache: true}
}
}
)
puts "Created scorer function: #{scorer_slug}"
# Now use the remote functions in Eval.run
puts "\nRunning evaluation with remote functions..."
# Get references to the remote functions
task = Braintrust::Functions.task(
project: project_name,
slug: function_slug
)
# Define test cases
cases = [
{input: "apple", expected: "fruit"},
{input: "banana", expected: "fruit"},
{input: "carrot", expected: "vegetable"},
{input: "broccoli", expected: "vegetable"}
]
# Run the evaluation
# Both the task AND scorer will execute on the Braintrust server, not locally
# Scorers can be referenced by name — they're resolved from the project automatically
Braintrust::Eval.run(
project: project_name,
experiment: "remote-function-demo",
cases: cases,
task: task,
scorers: [scorer_slug]
)
# Flush all spans to ensure they're exported
OpenTelemetry.tracer_provider.shutdown