-
Notifications
You must be signed in to change notification settings - Fork 14
154 lines (136 loc) · 5.58 KB
/
eval-quality.yml
File metadata and controls
154 lines (136 loc) · 5.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
name: Eval Quality Gate
on:
workflow_dispatch:
schedule:
- cron: "0 3 * * *"
permissions:
contents: read
models: read
jobs:
eval-quality:
timeout-minutes: 45
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5
- name: Setup Node.js
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020
with:
node-version: '22'
cache: 'npm'
- name: Setup Rust
uses: dtolnay/rust-toolchain@efa25f7f19611383d5b0ccf2d1c8914531636bf9
with:
toolchain: stable
- name: Cache Cargo
uses: Swatinem/rust-cache@42dc69e1aa15d09112580998cf2ef0119e2e91ae
with:
workspaces: native -> target
- name: Install dependencies
run: npm ci
- name: Build native module
working-directory: native
run: |
cargo build --release
npx napi build --release --platform
- name: Resolve eval provider config
id: eval-provider
env:
EVAL_EMBED_BASE_URL: ${{ secrets.EVAL_EMBED_BASE_URL }}
EVAL_EMBED_API_KEY: ${{ secrets.EVAL_EMBED_API_KEY }}
EVAL_EMBED_MODEL: ${{ secrets.EVAL_EMBED_MODEL }}
EVAL_EMBED_DIMENSIONS: ${{ secrets.EVAL_EMBED_DIMENSIONS }}
GITHUB_TOKEN: ${{ github.token }}
run: |
MODEL="${EVAL_EMBED_MODEL:-text-embedding-3-small}"
DIMS="${EVAL_EMBED_DIMENSIONS:-1536}"
if [ -n "$EVAL_EMBED_BASE_URL" ] || [ -n "$EVAL_EMBED_API_KEY" ]; then
if [ -z "$EVAL_EMBED_BASE_URL" ] || [ -z "$EVAL_EMBED_API_KEY" ]; then
echo "If overriding the default GitHub Models provider, both EVAL_EMBED_BASE_URL and EVAL_EMBED_API_KEY must be set."
exit 1
fi
echo "provider_source=custom-secrets" >> "$GITHUB_OUTPUT"
echo "budget_path=benchmarks/budgets/default.json" >> "$GITHUB_OUTPUT"
echo "against_path=benchmarks/baselines/eval-baseline-summary.json" >> "$GITHUB_OUTPUT"
echo "base_url=$EVAL_EMBED_BASE_URL" >> "$GITHUB_OUTPUT"
echo "api_key=$EVAL_EMBED_API_KEY" >> "$GITHUB_OUTPUT"
else
if [ -z "$GITHUB_TOKEN" ]; then
echo "Missing GitHub Actions token for GitHub Models fallback."
exit 1
fi
echo "provider_source=github-models" >> "$GITHUB_OUTPUT"
echo "budget_path=benchmarks/budgets/github-models.json" >> "$GITHUB_OUTPUT"
echo "against_path=" >> "$GITHUB_OUTPUT"
echo "base_url=https://models.inference.ai.azure.com" >> "$GITHUB_OUTPUT"
echo "api_key=$GITHUB_TOKEN" >> "$GITHUB_OUTPUT"
echo "::notice title=Eval quality gate provider::Using GitHub Models via GITHUB_TOKEN with the GitHub Models CI budget."
fi
echo "model=$MODEL" >> "$GITHUB_OUTPUT"
echo "dimensions=$DIMS" >> "$GITHUB_OUTPUT"
- name: Ensure baseline exists for baseline-driven budget
if: steps.eval-provider.outputs.against_path != ''
run: |
if [ ! -f "${{ steps.eval-provider.outputs.against_path }}" ]; then
echo "Missing baseline summary: ${{ steps.eval-provider.outputs.against_path }}"
exit 1
fi
- name: Create eval quality config from secrets
run: |
if [ "${{ steps.eval-provider.outputs.provider_source }}" = "github-models" ]; then
EMBED_CONCURRENCY=1
EMBED_REQUEST_INTERVAL_MS=5000
EMBED_RETRIES=5
EMBED_RETRY_DELAY_MS=15000
else
EMBED_CONCURRENCY=3
EMBED_REQUEST_INTERVAL_MS=1000
EMBED_RETRIES=3
EMBED_RETRY_DELAY_MS=1000
fi
cat > .github/eval-quality-config.json <<EOF
{
"embeddingProvider": "custom",
"customProvider": {
"baseUrl": "${{ steps.eval-provider.outputs.base_url }}",
"apiKey": "${{ steps.eval-provider.outputs.api_key }}",
"model": "${{ steps.eval-provider.outputs.model }}",
"dimensions": ${{ steps.eval-provider.outputs.dimensions }},
"timeoutMs": 30000,
"concurrency": $EMBED_CONCURRENCY,
"requestIntervalMs": $EMBED_REQUEST_INTERVAL_MS
},
"indexing": {
"autoIndex": false,
"watchFiles": false,
"respectGitignore": true,
"semanticOnly": false,
"requireProjectMarker": false,
"retries": $EMBED_RETRIES,
"retryDelayMs": $EMBED_RETRY_DELAY_MS
},
"search": {
"maxResults": 10,
"minScore": 0,
"hybridWeight": 0.4,
"fusionStrategy": "rrf",
"rrfK": 60,
"rerankTopN": 20,
"enableCrossLanguage": true
},
"debug": {
"enabled": false,
"logLevel": "info",
"metrics": true
}
}
EOF
- name: Run eval quality gate (real provider config required)
run: |
ARGS=(eval run --config .github/eval-quality-config.json --reindex --ci --budget "${{ steps.eval-provider.outputs.budget_path }}")
if [ -n "${{ steps.eval-provider.outputs.against_path }}" ]; then
ARGS+=(--against "${{ steps.eval-provider.outputs.against_path }}")
fi
npx tsx src/cli.ts "${ARGS[@]}"
- name: Cleanup eval quality config
if: always()
run: rm -f .github/eval-quality-config.json