Skip to content

Commit b87afdd

Browse files
authored
Merge pull request #1197 from Kiln-AI/scosman/chat_tags
API Agent Annotations
2 parents 3bf27aa + 9de599a commit b87afdd

File tree

212 files changed

+3433
-61
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

212 files changed

+3433
-61
lines changed

.github/workflows/check_api_bindings.yml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,42 @@ jobs:
6060
# Change to the correct directory and run the schema check
6161
cd app/web_ui/src/lib
6262
./check_schema.sh
63+
cd -
64+
65+
# Check agent policy annotations
66+
ANNOTATIONS_DIR="libs/server/kiln_server/utils/agent_checks/annotations"
67+
TEMP_DIR=$(mktemp -d)
68+
69+
echo "Checking for unannotated endpoints..."
70+
uv run python -m kiln_server.utils.agent_checks.dump_annotations \
71+
http://localhost:8757/openapi.json "$TEMP_DIR"
72+
73+
echo "Checking annotation files are up to date..."
74+
DIFF_FAILED=false
75+
for f in "$TEMP_DIR"/*.json; do
76+
filename=$(basename "$f")
77+
if [ ! -f "$ANNOTATIONS_DIR/$filename" ]; then
78+
echo "Missing checked-in annotation: $filename"
79+
DIFF_FAILED=true
80+
elif ! diff -q "$f" "$ANNOTATIONS_DIR/$filename" > /dev/null 2>&1; then
81+
echo "Annotation differs: $filename"
82+
diff -u "$ANNOTATIONS_DIR/$filename" "$f" || true
83+
DIFF_FAILED=true
84+
fi
85+
done
86+
87+
if [ "$DIFF_FAILED" = true ]; then
88+
echo ""
89+
echo -e "\033[31mAgent policy annotations are not up to date.\033[0m"
90+
echo "Run the dump CLI to regenerate:"
91+
echo " uv run python -m kiln_server.utils.agent_checks.dump_annotations http://localhost:8757/openapi.json $ANNOTATIONS_DIR"
92+
rm -rf "$TEMP_DIR"
93+
kill $DEV_SERVER_PID || true
94+
exit 1
95+
fi
96+
97+
echo "Agent policy annotations are up to date."
98+
rm -rf "$TEMP_DIR"
6399
64100
# Stop dev server
65101
kill $DEV_SERVER_PID || true

app/desktop/studio_server/copilot_api.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
RefineSpecApiOutput,
7676
SubmitAnswersRequest,
7777
)
78+
from kiln_server.utils.agent_checks.policy import ALLOW_AGENT
7879
from pydantic import BaseModel, Field
7980

8081
logger = logging.getLogger(__name__)
@@ -114,7 +115,7 @@ class CreateSpecWithCopilotRequest(BaseModel):
114115

115116

116117
def connect_copilot_api(app: FastAPI):
117-
@app.post("/api/copilot/clarify_spec", tags=["Copilot"])
118+
@app.post("/api/copilot/clarify_spec", tags=["Copilot"], openapi_extra=ALLOW_AGENT)
118119
async def clarify_spec(input: ClarifySpecApiInput) -> ClarifySpecApiOutput:
119120
api_key = get_copilot_api_key()
120121
client = get_authenticated_client(api_key)
@@ -140,7 +141,7 @@ async def clarify_spec(input: ClarifySpecApiInput) -> ClarifySpecApiOutput:
140141
detail="Unknown error.",
141142
)
142143

143-
@app.post("/api/copilot/refine_spec", tags=["Copilot"])
144+
@app.post("/api/copilot/refine_spec", tags=["Copilot"], openapi_extra=ALLOW_AGENT)
144145
async def refine_spec(input: RefineSpecApiInput) -> RefineSpecApiOutput:
145146
api_key = get_copilot_api_key()
146147
client = get_authenticated_client(api_key)
@@ -166,7 +167,9 @@ async def refine_spec(input: RefineSpecApiInput) -> RefineSpecApiOutput:
166167
detail="Unknown error.",
167168
)
168169

169-
@app.post("/api/copilot/generate_batch", tags=["Copilot"])
170+
@app.post(
171+
"/api/copilot/generate_batch", tags=["Copilot"], openapi_extra=ALLOW_AGENT
172+
)
170173
async def generate_batch(input: GenerateBatchApiInput) -> GenerateBatchApiOutput:
171174
api_key = get_copilot_api_key()
172175
client = get_authenticated_client(api_key)
@@ -192,7 +195,7 @@ async def generate_batch(input: GenerateBatchApiInput) -> GenerateBatchApiOutput
192195
detail="Unknown error.",
193196
)
194197

195-
@app.post("/api/copilot/question_spec", tags=["Copilot"])
198+
@app.post("/api/copilot/question_spec", tags=["Copilot"], openapi_extra=ALLOW_AGENT)
196199
async def question_spec(
197200
input: SpecQuestionerApiInput,
198201
) -> QuestionSet:
@@ -220,7 +223,11 @@ async def question_spec(
220223
detail="Unknown error.",
221224
)
222225

223-
@app.post("/api/copilot/refine_spec_with_question_answers", tags=["Copilot"])
226+
@app.post(
227+
"/api/copilot/refine_spec_with_question_answers",
228+
tags=["Copilot"],
229+
openapi_extra=ALLOW_AGENT,
230+
)
224231
async def submit_question_answers(
225232
request: SubmitAnswersRequest,
226233
) -> RefineSpecApiOutput:
@@ -249,6 +256,7 @@ async def submit_question_answers(
249256
@app.post(
250257
"/api/projects/{project_id}/tasks/{task_id}/spec_with_copilot",
251258
tags=["Copilot"],
259+
openapi_extra=ALLOW_AGENT,
252260
)
253261
async def create_spec_with_copilot(
254262
project_id: Annotated[

app/desktop/studio_server/data_gen_api.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
)
2424
from kiln_server.project_api import project_from_id
2525
from kiln_server.task_api import task_from_id
26+
from kiln_server.utils.agent_checks.policy import ALLOW_AGENT
2627
from openai.types.chat import (
2728
ChatCompletionSystemMessageParam,
2829
ChatCompletionUserMessageParam,
@@ -126,6 +127,7 @@ def connect_data_gen_api(app: FastAPI):
126127
"/api/projects/{project_id}/tasks/{task_id}/generate_categories",
127128
summary="Generate Categories",
128129
tags=["Synthetic Data"],
130+
openapi_extra=ALLOW_AGENT,
129131
)
130132
async def generate_categories(
131133
project_id: Annotated[
@@ -170,6 +172,7 @@ async def generate_categories(
170172
"/api/projects/{project_id}/tasks/{task_id}/generate_inputs",
171173
summary="Generate Inputs",
172174
tags=["Synthetic Data"],
175+
openapi_extra=ALLOW_AGENT,
173176
)
174177
async def generate_samples(
175178
project_id: Annotated[
@@ -213,6 +216,7 @@ async def generate_samples(
213216
"/api/projects/{project_id}/tasks/{task_id}/save_sample",
214217
summary="Save Sample",
215218
tags=["Synthetic Data"],
219+
openapi_extra=ALLOW_AGENT,
216220
)
217221
async def save_sample(
218222
project_id: Annotated[
@@ -237,6 +241,7 @@ async def save_sample(
237241
"/api/projects/{project_id}/tasks/{task_id}/generate_sample",
238242
summary="Generate Sample",
239243
tags=["Synthetic Data"],
244+
openapi_extra=ALLOW_AGENT,
240245
)
241246
async def generate_sample(
242247
project_id: Annotated[
@@ -307,6 +312,7 @@ async def generate_sample(
307312
"/api/projects/{project_id}/tasks/{task_id}/generate_qna",
308313
summary="Generate Q&A Pairs",
309314
tags=["Synthetic Data"],
315+
openapi_extra=ALLOW_AGENT,
310316
)
311317
async def generate_qna_pairs(
312318
project_id: Annotated[
@@ -362,6 +368,7 @@ async def generate_qna_pairs(
362368
"/api/projects/{project_id}/tasks/{task_id}/save_qna_pair",
363369
summary="Save Q&A Pair",
364370
tags=["Synthetic Data"],
371+
openapi_extra=ALLOW_AGENT,
365372
)
366373
async def save_qna_pair(
367374
project_id: Annotated[

app/desktop/studio_server/eval_api.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@
2929
from kiln_ai.datamodel.task_output import normalize_rating
3030
from kiln_ai.utils.name_generator import generate_memorable_name
3131
from kiln_server.task_api import task_from_id
32+
from kiln_server.utils.agent_checks.policy import (
33+
ALLOW_AGENT,
34+
DENY_AGENT,
35+
agent_policy_require_approval,
36+
)
3237
from pydantic import BaseModel, Field
3338

3439
from .correlation_calculator import (
@@ -439,6 +444,7 @@ def connect_evals_api(app: FastAPI):
439444
"/api/projects/{project_id}/tasks/{task_id}/create_evaluator",
440445
summary="Create Evaluator",
441446
tags=["Evals"],
447+
openapi_extra=ALLOW_AGENT,
442448
)
443449
async def create_evaluator(
444450
project_id: Annotated[
@@ -469,6 +475,7 @@ async def create_evaluator(
469475
"/api/projects/{project_id}/tasks/{task_id}/run_configs",
470476
summary="List Run Configs",
471477
tags=["Run Configs"],
478+
openapi_extra=ALLOW_AGENT,
472479
)
473480
async def get_run_configs(
474481
project_id: Annotated[
@@ -485,6 +492,7 @@ async def get_run_configs(
485492
"/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}",
486493
summary="Get Eval",
487494
tags=["Evals"],
495+
openapi_extra=ALLOW_AGENT,
488496
)
489497
async def get_eval(
490498
project_id: Annotated[
@@ -502,6 +510,7 @@ async def get_eval(
502510
"/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}",
503511
summary="Delete Eval",
504512
tags=["Evals"],
513+
openapi_extra=DENY_AGENT,
505514
)
506515
async def delete_eval(
507516
project_id: Annotated[
@@ -520,6 +529,9 @@ async def delete_eval(
520529
"/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}",
521530
summary="Update Eval",
522531
tags=["Evals"],
532+
openapi_extra=agent_policy_require_approval(
533+
"Allow agent to edit eval? Ensure you backup your project before allowing agentic edits."
534+
),
523535
)
524536
async def update_eval(
525537
project_id: Annotated[
@@ -558,6 +570,7 @@ async def update_eval(
558570
"/api/projects/{project_id}/tasks/{task_id}/evals",
559571
summary="List Evals",
560572
tags=["Evals"],
573+
openapi_extra=ALLOW_AGENT,
561574
)
562575
async def get_evals(
563576
project_id: Annotated[
@@ -576,6 +589,7 @@ async def get_evals(
576589
"/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}/eval_configs",
577590
summary="List Eval Configs",
578591
tags=["Evals"],
592+
openapi_extra=ALLOW_AGENT,
579593
)
580594
async def get_eval_configs(
581595
project_id: Annotated[
@@ -594,6 +608,7 @@ async def get_eval_configs(
594608
"/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}/eval_config/{eval_config_id}",
595609
summary="Get Eval Config",
596610
tags=["Evals"],
611+
openapi_extra=ALLOW_AGENT,
597612
)
598613
async def get_eval_config(
599614
project_id: Annotated[
@@ -615,6 +630,7 @@ async def get_eval_config(
615630
"/api/projects/{project_id}/tasks/{task_id}/run_configs",
616631
summary="Create Run Config",
617632
tags=["Run Configs"],
633+
openapi_extra=ALLOW_AGENT,
618634
)
619635
async def create_task_run_config(
620636
project_id: Annotated[
@@ -672,6 +688,9 @@ async def create_task_run_config(
672688
"/api/projects/{project_id}/tasks/{task_id}/run_configs/{run_config_id}",
673689
summary="Update Run Config",
674690
tags=["Run Configs"],
691+
openapi_extra=agent_policy_require_approval(
692+
"Allow agent to edit run config? Ensure you backup your project before allowing agentic edits."
693+
),
675694
)
676695
async def update_run_config(
677696
project_id: Annotated[
@@ -712,6 +731,7 @@ async def update_run_config(
712731
"/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}/create_eval_config",
713732
summary="Create Eval Config",
714733
tags=["Evals"],
734+
openapi_extra=ALLOW_AGENT,
715735
)
716736
async def create_eval_config(
717737
project_id: Annotated[
@@ -743,6 +763,7 @@ async def create_eval_config(
743763
"/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}/eval_config/{eval_config_id}/run_comparison",
744764
summary="Run Run Config Comparison",
745765
tags=["Evals"],
766+
openapi_extra=ALLOW_AGENT,
746767
)
747768
async def run_eval_config(
748769
project_id: Annotated[
@@ -798,6 +819,7 @@ async def run_eval_config(
798819
"/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}/set_current_eval_config/{eval_config_id}",
799820
summary="Set Default Eval Config",
800821
tags=["Evals"],
822+
openapi_extra=ALLOW_AGENT,
801823
)
802824
async def set_default_eval_config(
803825
project_id: Annotated[
@@ -843,6 +865,7 @@ async def set_default_eval_config(
843865
"/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}/run_calibration",
844866
summary="Run Calibration",
845867
tags=["Evals"],
868+
openapi_extra=ALLOW_AGENT,
846869
)
847870
async def run_eval_config_eval(
848871
project_id: Annotated[
@@ -869,6 +892,7 @@ async def run_eval_config_eval(
869892
"/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}/eval_config/{eval_config_id}/run_config/{run_config_id}/results",
870893
summary="Get Eval Run Results",
871894
tags=["Evals"],
895+
openapi_extra=ALLOW_AGENT,
872896
)
873897
async def get_eval_run_results(
874898
project_id: Annotated[
@@ -906,6 +930,7 @@ async def get_eval_run_results(
906930
"/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}/progress",
907931
summary="Get Eval Progress",
908932
tags=["Evals"],
933+
openapi_extra=ALLOW_AGENT,
909934
)
910935
async def get_eval_progress(
911936
project_id: Annotated[
@@ -965,6 +990,7 @@ async def get_eval_progress(
965990
"/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}/eval_config/{eval_config_id}/score_summary",
966991
summary="Get Run Config Score Summary",
967992
tags=["Evals"],
993+
openapi_extra=ALLOW_AGENT,
968994
)
969995
async def get_eval_config_score_summary(
970996
project_id: Annotated[
@@ -1078,6 +1104,7 @@ async def get_eval_config_score_summary(
10781104
"/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}/eval_configs_score_summary",
10791105
summary="Get Eval Config Comparison Summary",
10801106
tags=["Evals"],
1107+
openapi_extra=ALLOW_AGENT,
10811108
)
10821109
async def get_eval_configs_score_summary(
10831110
project_id: Annotated[
@@ -1224,6 +1251,7 @@ async def get_eval_configs_score_summary(
12241251
"/api/projects/{project_id}/tasks/{task_id}/run_configs/{run_config_id}/eval_scores",
12251252
summary="Get Run Config Eval Scores",
12261253
tags=["Run Configs"],
1254+
openapi_extra=ALLOW_AGENT,
12271255
)
12281256
async def get_run_config_eval_scores(
12291257
project_id: Annotated[

0 commit comments

Comments
 (0)