2929from kiln_ai .datamodel .task_output import normalize_rating
3030from kiln_ai .utils .name_generator import generate_memorable_name
3131from kiln_server .task_api import task_from_id
32+ from kiln_server .utils .agent_checks .policy import (
33+ ALLOW_AGENT ,
34+ DENY_AGENT ,
35+ agent_policy_require_approval ,
36+ )
3237from pydantic import BaseModel , Field
3338
3439from .correlation_calculator import (
@@ -439,6 +444,7 @@ def connect_evals_api(app: FastAPI):
439444 "/api/projects/{project_id}/tasks/{task_id}/create_evaluator" ,
440445 summary = "Create Evaluator" ,
441446 tags = ["Evals" ],
447+ openapi_extra = ALLOW_AGENT ,
442448 )
443449 async def create_evaluator (
444450 project_id : Annotated [
@@ -469,6 +475,7 @@ async def create_evaluator(
469475 "/api/projects/{project_id}/tasks/{task_id}/run_configs" ,
470476 summary = "List Run Configs" ,
471477 tags = ["Run Configs" ],
478+ openapi_extra = ALLOW_AGENT ,
472479 )
473480 async def get_run_configs (
474481 project_id : Annotated [
@@ -485,6 +492,7 @@ async def get_run_configs(
485492 "/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}" ,
486493 summary = "Get Eval" ,
487494 tags = ["Evals" ],
495+ openapi_extra = ALLOW_AGENT ,
488496 )
489497 async def get_eval (
490498 project_id : Annotated [
@@ -502,6 +510,7 @@ async def get_eval(
502510 "/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}" ,
503511 summary = "Delete Eval" ,
504512 tags = ["Evals" ],
513+ openapi_extra = DENY_AGENT ,
505514 )
506515 async def delete_eval (
507516 project_id : Annotated [
@@ -520,6 +529,9 @@ async def delete_eval(
520529 "/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}" ,
521530 summary = "Update Eval" ,
522531 tags = ["Evals" ],
532+ openapi_extra = agent_policy_require_approval (
533+ "Allow agent to edit eval? Ensure you backup your project before allowing agentic edits."
534+ ),
523535 )
524536 async def update_eval (
525537 project_id : Annotated [
@@ -558,6 +570,7 @@ async def update_eval(
558570 "/api/projects/{project_id}/tasks/{task_id}/evals" ,
559571 summary = "List Evals" ,
560572 tags = ["Evals" ],
573+ openapi_extra = ALLOW_AGENT ,
561574 )
562575 async def get_evals (
563576 project_id : Annotated [
@@ -576,6 +589,7 @@ async def get_evals(
576589 "/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}/eval_configs" ,
577590 summary = "List Eval Configs" ,
578591 tags = ["Evals" ],
592+ openapi_extra = ALLOW_AGENT ,
579593 )
580594 async def get_eval_configs (
581595 project_id : Annotated [
@@ -594,6 +608,7 @@ async def get_eval_configs(
594608 "/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}/eval_config/{eval_config_id}" ,
595609 summary = "Get Eval Config" ,
596610 tags = ["Evals" ],
611+ openapi_extra = ALLOW_AGENT ,
597612 )
598613 async def get_eval_config (
599614 project_id : Annotated [
@@ -615,6 +630,7 @@ async def get_eval_config(
615630 "/api/projects/{project_id}/tasks/{task_id}/run_configs" ,
616631 summary = "Create Run Config" ,
617632 tags = ["Run Configs" ],
633+ openapi_extra = ALLOW_AGENT ,
618634 )
619635 async def create_task_run_config (
620636 project_id : Annotated [
@@ -672,6 +688,9 @@ async def create_task_run_config(
672688 "/api/projects/{project_id}/tasks/{task_id}/run_configs/{run_config_id}" ,
673689 summary = "Update Run Config" ,
674690 tags = ["Run Configs" ],
691+ openapi_extra = agent_policy_require_approval (
692+ "Allow agent to edit run config? Ensure you backup your project before allowing agentic edits."
693+ ),
675694 )
676695 async def update_run_config (
677696 project_id : Annotated [
@@ -712,6 +731,7 @@ async def update_run_config(
712731 "/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}/create_eval_config" ,
713732 summary = "Create Eval Config" ,
714733 tags = ["Evals" ],
734+ openapi_extra = ALLOW_AGENT ,
715735 )
716736 async def create_eval_config (
717737 project_id : Annotated [
@@ -743,6 +763,7 @@ async def create_eval_config(
743763 "/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}/eval_config/{eval_config_id}/run_comparison" ,
744764 summary = "Run Run Config Comparison" ,
745765 tags = ["Evals" ],
766+ openapi_extra = ALLOW_AGENT ,
746767 )
747768 async def run_eval_config (
748769 project_id : Annotated [
@@ -798,6 +819,7 @@ async def run_eval_config(
798819 "/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}/set_current_eval_config/{eval_config_id}" ,
799820 summary = "Set Default Eval Config" ,
800821 tags = ["Evals" ],
822+ openapi_extra = ALLOW_AGENT ,
801823 )
802824 async def set_default_eval_config (
803825 project_id : Annotated [
@@ -843,6 +865,7 @@ async def set_default_eval_config(
843865 "/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}/run_calibration" ,
844866 summary = "Run Calibration" ,
845867 tags = ["Evals" ],
868+ openapi_extra = ALLOW_AGENT ,
846869 )
847870 async def run_eval_config_eval (
848871 project_id : Annotated [
@@ -869,6 +892,7 @@ async def run_eval_config_eval(
869892 "/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}/eval_config/{eval_config_id}/run_config/{run_config_id}/results" ,
870893 summary = "Get Eval Run Results" ,
871894 tags = ["Evals" ],
895+ openapi_extra = ALLOW_AGENT ,
872896 )
873897 async def get_eval_run_results (
874898 project_id : Annotated [
@@ -906,6 +930,7 @@ async def get_eval_run_results(
906930 "/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}/progress" ,
907931 summary = "Get Eval Progress" ,
908932 tags = ["Evals" ],
933+ openapi_extra = ALLOW_AGENT ,
909934 )
910935 async def get_eval_progress (
911936 project_id : Annotated [
@@ -965,6 +990,7 @@ async def get_eval_progress(
965990 "/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}/eval_config/{eval_config_id}/score_summary" ,
966991 summary = "Get Run Config Score Summary" ,
967992 tags = ["Evals" ],
993+ openapi_extra = ALLOW_AGENT ,
968994 )
969995 async def get_eval_config_score_summary (
970996 project_id : Annotated [
@@ -1078,6 +1104,7 @@ async def get_eval_config_score_summary(
10781104 "/api/projects/{project_id}/tasks/{task_id}/evals/{eval_id}/eval_configs_score_summary" ,
10791105 summary = "Get Eval Config Comparison Summary" ,
10801106 tags = ["Evals" ],
1107+ openapi_extra = ALLOW_AGENT ,
10811108 )
10821109 async def get_eval_configs_score_summary (
10831110 project_id : Annotated [
@@ -1224,6 +1251,7 @@ async def get_eval_configs_score_summary(
12241251 "/api/projects/{project_id}/tasks/{task_id}/run_configs/{run_config_id}/eval_scores" ,
12251252 summary = "Get Run Config Eval Scores" ,
12261253 tags = ["Run Configs" ],
1254+ openapi_extra = ALLOW_AGENT ,
12271255 )
12281256 async def get_run_config_eval_scores (
12291257 project_id : Annotated [
0 commit comments