waza/.github/workflows/waza-eval.yml at main · microsoft/waza · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
name: Waza Evaluation

on:
  # Allow manual trigger
  workflow_dispatch:
    inputs:
      eval-yaml:
        description: 'Path to evaluation YAML file'
        required: true
        type: string
        default: 'examples/code-explainer/eval.yaml'
      context-dir:
        description: 'Context directory for fixtures (relative to eval.yaml if not absolute)'
        required: false
        type: string
        default: ''
      verbose:
        description: 'Enable verbose output'
        required: false
        type: boolean
        default: true
      output-file:
        description: 'Output JSON file path for results'
        required: false
        type: string
        default: 'results.json'
      artifact-name:
        description: 'Name for the results artifact'
        required: false
        type: string
        default: 'waza-evaluation-results'

  # Allow use as a reusable workflow
  workflow_call:
    inputs:
      eval-yaml:
        description: 'Path to evaluation YAML file'
        required: true
        type: string
      context-dir:
        description: 'Context directory for fixtures (relative to eval.yaml if not absolute)'
        required: false
        type: string
        default: ''
      verbose:
        description: 'Enable verbose output'
        required: false
        type: boolean
        default: true
      output-file:
        description: 'Output JSON file path for results'
        required: false
        type: string
        default: 'results.json'
      artifact-name:
        description: 'Name for the results artifact'
        required: false
        type: string
        default: 'waza-evaluation-results'

  # Trigger on PR to main branches
  # Note: When triggered automatically, this workflow runs the default eval file
  # (examples/code-explainer/eval.yaml). For testing specific eval files, use
  # workflow_dispatch or create dedicated workflows per example.
  pull_request:
    branches: [ main, develop ]
    paths:
      - 'examples/**/*.yaml'
      - 'examples/**/*.yml'
      - 'skills/**'
      - 'internal/execution/**'
      - 'internal/orchestration/**'
      - 'internal/graders/**'
      - '.github/workflows/waza-eval.yml'

  # Trigger on push to main branches
  push:
    branches: [ main, develop ]
    paths:
      - 'examples/**/*.yaml'
      - 'examples/**/*.yml'
      - 'skills/**'
      - 'internal/execution/**'
      - 'internal/orchestration/**'
      - 'internal/graders/**'
      - '.github/workflows/waza-eval.yml'

jobs:
  run-evaluation:
    name: Run Waza Evaluation
    runs-on: ubuntu-latest

    permissions:
      contents: read

    steps:
      - name: Checkout Repository
        uses: actions/checkout@v4
        with:
          lfs: true

      - name: Setup Go Environment
        uses: actions/setup-go@v5
        with:
          go-version: '1.26'
          cache-dependency-path: go.sum

      - name: Download Dependencies
        run: go mod download

      - name: Build Waza Binary
        run: |
          go build -v -o waza ./cmd/waza
          chmod +x ./waza

      - name: Verify Binary
        run: ./waza --version

      - name: Determine Eval File
        id: eval-file
        run: |
          # Use input if provided (from workflow_call or workflow_dispatch)
          # Otherwise use a default for PR/push triggers
          if [ -n "${{ inputs.eval-yaml }}" ]; then
            EVAL_FILE="${{ inputs.eval-yaml }}"
          else
            EVAL_FILE="examples/code-explainer/eval.yaml"
          fi
          echo "eval-file=$EVAL_FILE" >> "$GITHUB_OUTPUT"
          echo "Using eval file: $EVAL_FILE"

      - name: Determine Context Directory
        id: context-dir
        run: |
          EVAL_FILE="${{ steps.eval-file.outputs.eval-file }}"
          CONTEXT_INPUT="${{ inputs.context-dir }}"

          # If context-dir input is provided and not empty, use it
          if [ -n "$CONTEXT_INPUT" ]; then
            CONTEXT_DIR="$CONTEXT_INPUT"
          else
            # Default to fixtures directory relative to eval file
            EVAL_DIR="$(dirname "$EVAL_FILE")"
            CONTEXT_DIR="$EVAL_DIR/fixtures"
          fi

          echo "context-dir=$CONTEXT_DIR" >> "$GITHUB_OUTPUT"
          echo "Using context directory: $CONTEXT_DIR"

      - name: Run Evaluation
        id: run-eval
        run: |
          EVAL_FILE="${{ steps.eval-file.outputs.eval-file }}"
          CONTEXT_DIR="${{ steps.context-dir.outputs.context-dir }}"
          VERBOSE="${{ inputs.verbose }}"
          OUTPUT_FILE="${{ inputs.output-file }}"

          # Build command with optional flags using an argument array
          CMD=(./waza run "$EVAL_FILE")

          # Add context-dir if it exists
          if [ -d "$CONTEXT_DIR" ]; then
            CMD+=("--context-dir" "$CONTEXT_DIR")
          fi

          # Add verbose flag if enabled
          if [ "$VERBOSE" = "true" ]; then
            CMD+=("--verbose")
          fi

          # Add output file
          if [ -n "$OUTPUT_FILE" ]; then
            CMD+=("--output" "$OUTPUT_FILE")
          fi

          echo "Running: ${CMD[*]}"
          "${CMD[@]}"

      - name: Upload Results Artifact
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: ${{ inputs.artifact-name || 'waza-evaluation-results' }}
          path: |
            ${{ inputs.output-file || 'results.json' }}
            transcripts/
          retention-days: 30
          if-no-files-found: warn

      - name: Check Evaluation Status
        if: steps.run-eval.outcome == 'failure'
        run: |
          echo "::error::Waza evaluation failed"
          exit 1