fix(graders): make prompt-grader timeout configurable via WAZA_PROMPT_GRADER_TIMEOUT #110
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Waza Evaluation | |
| on: | |
| # Allow manual trigger | |
| workflow_dispatch: | |
| inputs: | |
| eval-yaml: | |
| description: 'Path to evaluation YAML file' | |
| required: true | |
| type: string | |
| default: 'examples/code-explainer/eval.yaml' | |
| context-dir: | |
| description: 'Context directory for fixtures (relative to eval.yaml if not absolute)' | |
| required: false | |
| type: string | |
| default: '' | |
| verbose: | |
| description: 'Enable verbose output' | |
| required: false | |
| type: boolean | |
| default: true | |
| output-file: | |
| description: 'Output JSON file path for results' | |
| required: false | |
| type: string | |
| default: 'results.json' | |
| artifact-name: | |
| description: 'Name for the results artifact' | |
| required: false | |
| type: string | |
| default: 'waza-evaluation-results' | |
| # Allow use as a reusable workflow | |
| workflow_call: | |
| inputs: | |
| eval-yaml: | |
| description: 'Path to evaluation YAML file' | |
| required: true | |
| type: string | |
| context-dir: | |
| description: 'Context directory for fixtures (relative to eval.yaml if not absolute)' | |
| required: false | |
| type: string | |
| default: '' | |
| verbose: | |
| description: 'Enable verbose output' | |
| required: false | |
| type: boolean | |
| default: true | |
| output-file: | |
| description: 'Output JSON file path for results' | |
| required: false | |
| type: string | |
| default: 'results.json' | |
| artifact-name: | |
| description: 'Name for the results artifact' | |
| required: false | |
| type: string | |
| default: 'waza-evaluation-results' | |
| # Trigger on PR to main branches | |
| # Note: When triggered automatically, this workflow runs the default eval file | |
| # (examples/code-explainer/eval.yaml). For testing specific eval files, use | |
| # workflow_dispatch or create dedicated workflows per example. | |
| pull_request: | |
| branches: [ main, develop ] | |
| paths: | |
| - 'examples/**/*.yaml' | |
| - 'examples/**/*.yml' | |
| - 'skills/**' | |
| - 'internal/execution/**' | |
| - 'internal/orchestration/**' | |
| - 'internal/graders/**' | |
| - '.github/workflows/waza-eval.yml' | |
| # Trigger on push to main branches | |
| push: | |
| branches: [ main, develop ] | |
| paths: | |
| - 'examples/**/*.yaml' | |
| - 'examples/**/*.yml' | |
| - 'skills/**' | |
| - 'internal/execution/**' | |
| - 'internal/orchestration/**' | |
| - 'internal/graders/**' | |
| - '.github/workflows/waza-eval.yml' | |
| jobs: | |
| run-evaluation: | |
| name: Run Waza Evaluation | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Checkout Repository | |
| uses: actions/checkout@v4 | |
| with: | |
| lfs: true | |
| - name: Setup Go Environment | |
| uses: actions/setup-go@v5 | |
| with: | |
| go-version: '1.26' | |
| cache-dependency-path: go.sum | |
| - name: Download Dependencies | |
| run: go mod download | |
| - name: Build Waza Binary | |
| run: | | |
| go build -v -o waza ./cmd/waza | |
| chmod +x ./waza | |
| - name: Verify Binary | |
| run: ./waza --version | |
| - name: Determine Eval File | |
| id: eval-file | |
| run: | | |
| # Use input if provided (from workflow_call or workflow_dispatch) | |
| # Otherwise use a default for PR/push triggers | |
| if [ -n "${{ inputs.eval-yaml }}" ]; then | |
| EVAL_FILE="${{ inputs.eval-yaml }}" | |
| else | |
| EVAL_FILE="examples/code-explainer/eval.yaml" | |
| fi | |
| echo "eval-file=$EVAL_FILE" >> "$GITHUB_OUTPUT" | |
| echo "Using eval file: $EVAL_FILE" | |
| - name: Determine Context Directory | |
| id: context-dir | |
| run: | | |
| EVAL_FILE="${{ steps.eval-file.outputs.eval-file }}" | |
| CONTEXT_INPUT="${{ inputs.context-dir }}" | |
| # If context-dir input is provided and not empty, use it | |
| if [ -n "$CONTEXT_INPUT" ]; then | |
| CONTEXT_DIR="$CONTEXT_INPUT" | |
| else | |
| # Default to fixtures directory relative to eval file | |
| EVAL_DIR="$(dirname "$EVAL_FILE")" | |
| CONTEXT_DIR="$EVAL_DIR/fixtures" | |
| fi | |
| echo "context-dir=$CONTEXT_DIR" >> "$GITHUB_OUTPUT" | |
| echo "Using context directory: $CONTEXT_DIR" | |
| - name: Run Evaluation | |
| id: run-eval | |
| run: | | |
| EVAL_FILE="${{ steps.eval-file.outputs.eval-file }}" | |
| CONTEXT_DIR="${{ steps.context-dir.outputs.context-dir }}" | |
| VERBOSE="${{ inputs.verbose }}" | |
| OUTPUT_FILE="${{ inputs.output-file }}" | |
| # Build command with optional flags using an argument array | |
| CMD=(./waza run "$EVAL_FILE") | |
| # Add context-dir if it exists | |
| if [ -d "$CONTEXT_DIR" ]; then | |
| CMD+=("--context-dir" "$CONTEXT_DIR") | |
| fi | |
| # Add verbose flag if enabled | |
| if [ "$VERBOSE" = "true" ]; then | |
| CMD+=("--verbose") | |
| fi | |
| # Add output file | |
| if [ -n "$OUTPUT_FILE" ]; then | |
| CMD+=("--output" "$OUTPUT_FILE") | |
| fi | |
| echo "Running: ${CMD[*]}" | |
| "${CMD[@]}" | |
| - name: Upload Results Artifact | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ inputs.artifact-name || 'waza-evaluation-results' }} | |
| path: | | |
| ${{ inputs.output-file || 'results.json' }} | |
| transcripts/ | |
| retention-days: 30 | |
| if-no-files-found: warn | |
| - name: Check Evaluation Status | |
| if: steps.run-eval.outcome == 'failure' | |
| run: | | |
| echo "::error::Waza evaluation failed" | |
| exit 1 |