-
Notifications
You must be signed in to change notification settings - Fork 56
194 lines (171 loc) · 5.67 KB
/
Copy pathwaza-eval.yml
File metadata and controls
194 lines (171 loc) · 5.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
name: Waza Evaluation
on:
# Allow manual trigger
workflow_dispatch:
inputs:
eval-yaml:
description: 'Path to evaluation YAML file'
required: true
type: string
default: 'examples/code-explainer/eval.yaml'
context-dir:
description: 'Context directory for fixtures (relative to eval.yaml if not absolute)'
required: false
type: string
default: ''
verbose:
description: 'Enable verbose output'
required: false
type: boolean
default: true
output-file:
description: 'Output JSON file path for results'
required: false
type: string
default: 'results.json'
artifact-name:
description: 'Name for the results artifact'
required: false
type: string
default: 'waza-evaluation-results'
# Allow use as a reusable workflow
workflow_call:
inputs:
eval-yaml:
description: 'Path to evaluation YAML file'
required: true
type: string
context-dir:
description: 'Context directory for fixtures (relative to eval.yaml if not absolute)'
required: false
type: string
default: ''
verbose:
description: 'Enable verbose output'
required: false
type: boolean
default: true
output-file:
description: 'Output JSON file path for results'
required: false
type: string
default: 'results.json'
artifact-name:
description: 'Name for the results artifact'
required: false
type: string
default: 'waza-evaluation-results'
# Trigger on PR to main branches
# Note: When triggered automatically, this workflow runs the default eval file
# (examples/code-explainer/eval.yaml). For testing specific eval files, use
# workflow_dispatch or create dedicated workflows per example.
pull_request:
branches: [ main, develop ]
paths:
- 'examples/**/*.yaml'
- 'examples/**/*.yml'
- 'skills/**'
- 'internal/execution/**'
- 'internal/orchestration/**'
- 'internal/graders/**'
- '.github/workflows/waza-eval.yml'
# Trigger on push to main branches
push:
branches: [ main, develop ]
paths:
- 'examples/**/*.yaml'
- 'examples/**/*.yml'
- 'skills/**'
- 'internal/execution/**'
- 'internal/orchestration/**'
- 'internal/graders/**'
- '.github/workflows/waza-eval.yml'
jobs:
run-evaluation:
name: Run Waza Evaluation
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
lfs: true
- name: Setup Go Environment
uses: actions/setup-go@v5
with:
go-version: '1.26'
cache-dependency-path: go.sum
- name: Download Dependencies
run: go mod download
- name: Build Waza Binary
run: |
go build -v -o waza ./cmd/waza
chmod +x ./waza
- name: Verify Binary
run: ./waza --version
- name: Determine Eval File
id: eval-file
run: |
# Use input if provided (from workflow_call or workflow_dispatch)
# Otherwise use a default for PR/push triggers
if [ -n "${{ inputs.eval-yaml }}" ]; then
EVAL_FILE="${{ inputs.eval-yaml }}"
else
EVAL_FILE="examples/code-explainer/eval.yaml"
fi
echo "eval-file=$EVAL_FILE" >> "$GITHUB_OUTPUT"
echo "Using eval file: $EVAL_FILE"
- name: Determine Context Directory
id: context-dir
run: |
EVAL_FILE="${{ steps.eval-file.outputs.eval-file }}"
CONTEXT_INPUT="${{ inputs.context-dir }}"
# If context-dir input is provided and not empty, use it
if [ -n "$CONTEXT_INPUT" ]; then
CONTEXT_DIR="$CONTEXT_INPUT"
else
# Default to fixtures directory relative to eval file
EVAL_DIR="$(dirname "$EVAL_FILE")"
CONTEXT_DIR="$EVAL_DIR/fixtures"
fi
echo "context-dir=$CONTEXT_DIR" >> "$GITHUB_OUTPUT"
echo "Using context directory: $CONTEXT_DIR"
- name: Run Evaluation
id: run-eval
run: |
EVAL_FILE="${{ steps.eval-file.outputs.eval-file }}"
CONTEXT_DIR="${{ steps.context-dir.outputs.context-dir }}"
VERBOSE="${{ inputs.verbose }}"
OUTPUT_FILE="${{ inputs.output-file }}"
# Build command with optional flags using an argument array
CMD=(./waza run "$EVAL_FILE")
# Add context-dir if it exists
if [ -d "$CONTEXT_DIR" ]; then
CMD+=("--context-dir" "$CONTEXT_DIR")
fi
# Add verbose flag if enabled
if [ "$VERBOSE" = "true" ]; then
CMD+=("--verbose")
fi
# Add output file
if [ -n "$OUTPUT_FILE" ]; then
CMD+=("--output" "$OUTPUT_FILE")
fi
echo "Running: ${CMD[*]}"
"${CMD[@]}"
- name: Upload Results Artifact
if: always()
uses: actions/upload-artifact@v4
with:
name: ${{ inputs.artifact-name || 'waza-evaluation-results' }}
path: |
${{ inputs.output-file || 'results.json' }}
transcripts/
retention-days: 30
if-no-files-found: warn
- name: Check Evaluation Status
if: steps.run-eval.outcome == 'failure'
run: |
echo "::error::Waza evaluation failed"
exit 1