Kiln-AI
diff --git a/‎app/desktop/studio_server/api_client/kiln_ai_server_client/models/examples_with_feedback_item.py‎
Lines changed: 11 additions & 11 deletions b/‎app/desktop/studio_server/api_client/kiln_ai_server_client/models/examples_with_feedback_item.py‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎app/desktop/studio_server/api_models/copilot_models.py‎
Lines changed: 1 addition & 1 deletion b/‎app/desktop/studio_server/api_models/copilot_models.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎app/desktop/studio_server/api_models/test_copilot_models.py‎
Lines changed: 5 additions & 5 deletions b/‎app/desktop/studio_server/api_models/test_copilot_models.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎app/desktop/studio_server/utils/copilot_utils.py‎
Lines changed: 0 additions & 1 deletion b/‎app/desktop/studio_server/utils/copilot_utils.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎app/desktop/studio_server/utils/test_copilot_utils.py‎
Lines changed: 0 additions & 22 deletions b/‎app/desktop/studio_server/utils/test_copilot_utils.py‎
Lines changed: 0 additions & 22 deletions
diff --git a/‎app/web_ui/src/lib/api_schema.d.ts‎
Lines changed: 165 additions & 12 deletions b/‎app/web_ui/src/lib/api_schema.d.ts‎
Lines changed: 165 additions & 12 deletions
diff --git a/‎app/web_ui/src/routes/(app)/specs/[project_id]/[task_id]/spec_builder/+page.svelte‎
Lines changed: 1 addition & 1 deletion b/‎app/web_ui/src/routes/(app)/specs/[project_id]/[task_id]/spec_builder/+page.svelte‎
Lines changed: 1 addition & 1 deletion
@@ -18,14 +18,14 @@ class ExamplesWithFeedbackItem:
         input_ (str):
         output (str):
         fails_specification (bool): Judge's verdict - whether the output fails the Target Specification
-        user_feedback (None | str | Unset): Optional text feedback from the user
+        feedback (None | str | Unset): Optional text feedback from the user
     """
 
     user_agrees_with_judge: bool
     input_: str
     output: str
     fails_specification: bool
-    user_feedback: None | str | Unset = UNSET
+    feedback: None | str | Unset = UNSET
 
     def to_dict(self) -> dict[str, Any]:
         user_agrees_with_judge = self.user_agrees_with_judge
@@ -36,11 +36,11 @@ def to_dict(self) -> dict[str, Any]:
 
         fails_specification = self.fails_specification
 
-        user_feedback: None | str | Unset
-        if isinstance(self.user_feedback, Unset):
-            user_feedback = UNSET
+        feedback: None | str | Unset
+        if isinstance(self.feedback, Unset):
+            feedback = UNSET
         else:
-            user_feedback = self.user_feedback
+            feedback = self.feedback
 
         field_dict: dict[str, Any] = {}
 
@@ -52,8 +52,8 @@ def to_dict(self) -> dict[str, Any]:
                 "fails_specification": fails_specification,
             }
         )
-        if user_feedback is not UNSET:
-            field_dict["user_feedback"] = user_feedback
+        if feedback is not UNSET:
+            field_dict["feedback"] = feedback
 
         return field_dict
 
@@ -68,21 +68,21 @@ def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
 
         fails_specification = d.pop("fails_specification")
 
-        def _parse_user_feedback(data: object) -> None | str | Unset:
+        def _parse_feedback(data: object) -> None | str | Unset:
             if data is None:
                 return data
             if isinstance(data, Unset):
                 return data
             return cast(None | str | Unset, data)
 
-        user_feedback = _parse_user_feedback(d.pop("user_feedback", UNSET))
+        feedback = _parse_feedback(d.pop("feedback", UNSET))
 
         examples_with_feedback_item = cls(
             user_agrees_with_judge=user_agrees_with_judge,
             input_=input_,
             output=output,
             fails_specification=fails_specification,
-            user_feedback=user_feedback,
+            feedback=feedback,
         )
 
         return examples_with_feedback_item
@@ -79,7 +79,7 @@ class ExampleWithFeedbackApi(BaseModel):
     input: str = Field(alias="input")
     output: str
     fails_specification: bool
-    user_feedback: str | None = None
+    feedback: str | None = None
 
 
 class ClarifySpecApiInput(BaseModel):
 
@@ -153,24 +153,24 @@ def test_creates_with_required_fields(self):
         assert example.output == "test output"
         assert example.fails_specification is False
 
-    def test_user_feedback_optional(self):
+    def test_feedback_optional(self):
         example = ExampleWithFeedbackApi(
             user_agrees_with_judge=True,
             input="test input",
             output="test output",
             fails_specification=False,
         )
-        assert example.user_feedback is None
+        assert example.feedback is None
 
-    def test_user_feedback_can_be_set(self):
+    def test_feedback_can_be_set(self):
         example = ExampleWithFeedbackApi(
             user_agrees_with_judge=False,
             input="test input",
             output="test output",
             fails_specification=True,
-            user_feedback="This is wrong because...",
+            feedback="This is wrong because...",
         )
-        assert example.user_feedback == "This is wrong because..."
+        assert example.feedback == "This is wrong because..."
 
 
 class TestClarifySpecApiInput:
 
@@ -207,7 +207,6 @@ def create_task_run_from_reviewed(
                 },
             ),
         ),
-        user_feedback=example.feedback if example.feedback else None,
         tags=tags,
     )
 
 
@@ -225,28 +225,6 @@ def test_creates_task_run_with_pass_fail_rating_type(self):
             == TaskOutputRatingType.pass_fail
         )
 
-    def test_creates_task_run_with_user_feedback(self):
-        example = ReviewedExample(
-            input="test input",
-            output="test output",
-            model_says_meets_spec=True,
-            user_says_meets_spec=False,
-            feedback="This fails because the output is too vague",
-        )
-        task_run = create_task_run_from_reviewed(example, "golden_tag", "My Spec")
-        assert task_run.user_feedback == "This fails because the output is too vague"
-
-    def test_creates_task_run_with_no_user_feedback_when_empty(self):
-        example = ReviewedExample(
-            input="test input",
-            output="test output",
-            model_says_meets_spec=True,
-            user_says_meets_spec=True,
-            feedback="",
-        )
-        task_run = create_task_run_from_reviewed(example, "golden_tag", "My Spec")
-        assert task_run.user_feedback is None
-
 
 class TestCreateDatasetTaskRuns:
     def test_creates_correct_number_of_task_runs(self):
 
@@ -378,6 +378,24 @@ export interface paths {
         patch?: never;
         trace?: never;
     };
+    "/api/projects/{project_id}/tasks/{task_id}/runs/{run_id}/feedback": {
+        parameters: {
+            query?: never;
+            header?: never;
+            path?: never;
+            cookie?: never;
+        };
+        /** List Feedback */
+        get: operations["list_feedback_api_projects__project_id__tasks__task_id__runs__run_id__feedback_get"];
+        put?: never;
+        /** Create Feedback */
+        post: operations["create_feedback_api_projects__project_id__tasks__task_id__runs__run_id__feedback_post"];
+        delete?: never;
+        options?: never;
+        head?: never;
+        patch?: never;
+        trace?: never;
+    };
     "/api/projects/{project_id}/documents/bulk": {
         parameters: {
             query?: never;
@@ -3385,6 +3403,19 @@ export interface components {
             /** @description The properties of the extractor config, specific to the selected extractor_type. */
             properties: components["schemas"]["LitellmExtractorConfigProperties"];
         };
+        /**
+         * CreateFeedbackRequest
+         * @description Request body for creating feedback on a task run.
+         */
+        CreateFeedbackRequest: {
+            /**
+             * Feedback
+             * @description Free-form text feedback on the task run.
+             */
+            feedback: string;
+            /** @description Where this feedback originated. */
+            source: components["schemas"]["FeedbackSource"];
+        };
         /**
          * CreateFinetuneRequest
          * @description Request to create a finetune
@@ -4630,8 +4661,8 @@ export interface components {
             output: string;
             /** Fails Specification */
             fails_specification: boolean;
-            /** User Feedback */
-            user_feedback?: string | null;
+            /** Feedback */
+            feedback?: string | null;
         };
         /**
          * ExternalToolApiDescription
@@ -4950,6 +4981,62 @@ export interface components {
             /** Factually Inaccurate Examples */
             factually_inaccurate_examples: string;
         };
+        /**
+         * Feedback
+         * @description Feedback on a task run.
+         *
+         *     Supports multi-source feedback: different users, automated systems, and
+         *     different locations in the UI can each contribute independent feedback
+         *     entries on the same task run.
+         */
+        Feedback: {
+            /**
+             * V
+             * @description Schema version for migration support.
+             * @default 1
+             */
+            v: number;
+            /**
+             * Id
+             * @description Unique identifier for this record.
+             */
+            id?: string | null;
+            /**
+             * Path
+             * @description File system path where the record is stored.
+             */
+            path?: string | null;
+            /**
+             * Created At
+             * Format: date-time
+             * @description Timestamp when the model was created.
+             */
+            created_at?: string;
+            /**
+             * Created By
+             * @description User ID of the creator.
+             */
+            created_by?: string;
+            /**
+             * Feedback
+             * @description Free-form text feedback on the task run.
+             */
+            feedback: string;
+            /** @description Where this feedback originated, e.g. 'run-page' or 'spec-feedback'. */
+            source: components["schemas"]["FeedbackSource"];
+            /** Model Type */
+            readonly model_type: string;
+        };
+        /**
+         * FeedbackSource
+         * @description Where a piece of feedback originated.
+         *
+         *     This is an append-only enum: new sources can be added freely, but existing
+         *     values must never be removed or renamed so that older persisted data
+         *     continues to load.
+         * @enum {string}
+         */
+        FeedbackSource: "run-page" | "spec-feedback";
         /**
          * FewShotExample
          * @description An input/output example for few-shot prompting.
@@ -8055,11 +8142,6 @@ export interface components {
              * @description Instructions for fixing the output. Should define what is wrong, and how to fix it. Will be used by models for both generating a fixed output, and evaluating future models.
              */
             repair_instructions?: string | null;
-            /**
-             * User Feedback
-             * @description User feedback from the spec review process explaining why the output passes or fails a requirement.
-             */
-            user_feedback?: string | null;
             /** @description An version of the output with issues fixed. This must be a 'fixed' version of the existing output, and not an entirely new output. If you wish to generate an ideal curatorial output for this task unrelated to this output, generate a new TaskOutput with type 'human' instead of using this field. */
             repaired_output?: components["schemas"]["TaskOutput-Input"] | null;
             /**
@@ -8137,11 +8219,6 @@ export interface components {
              * @description Instructions for fixing the output. Should define what is wrong, and how to fix it. Will be used by models for both generating a fixed output, and evaluating future models.
              */
             repair_instructions?: string | null;
-            /**
-             * User Feedback
-             * @description User feedback from the spec review process explaining why the output passes or fails a requirement.
-             */
-            user_feedback?: string | null;
             /** @description An version of the output with issues fixed. This must be a 'fixed' version of the existing output, and not an entirely new output. If you wish to generate an ideal curatorial output for this task unrelated to this output, generate a new TaskOutput with type 'human' instead of using this field. */
             repaired_output?: components["schemas"]["TaskOutput-Output"] | null;
             /**
@@ -9813,6 +9890,82 @@ export interface operations {
             };
         };
     };
+    list_feedback_api_projects__project_id__tasks__task_id__runs__run_id__feedback_get: {
+        parameters: {
+            query?: never;
+            header?: never;
+            path: {
+                /** @description The unique identifier of the project. */
+                project_id: string;
+                /** @description The unique identifier of the task within the project. */
+                task_id: string;
+                /** @description The unique identifier of the task run. */
+                run_id: string;
+            };
+            cookie?: never;
+        };
+        requestBody?: never;
+        responses: {
+            /** @description Successful Response */
+            200: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["Feedback"][];
+                };
+            };
+            /** @description Validation Error */
+            422: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["HTTPValidationError"];
+                };
+            };
+        };
+    };
+    create_feedback_api_projects__project_id__tasks__task_id__runs__run_id__feedback_post: {
+        parameters: {
+            query?: never;
+            header?: never;
+            path: {
+                /** @description The unique identifier of the project. */
+                project_id: string;
+                /** @description The unique identifier of the task within the project. */
+                task_id: string;
+                /** @description The unique identifier of the task run. */
+                run_id: string;
+            };
+            cookie?: never;
+        };
+        requestBody: {
+            content: {
+                "application/json": components["schemas"]["CreateFeedbackRequest"];
+            };
+        };
+        responses: {
+            /** @description Successful Response */
+            200: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["Feedback"];
+                };
+            };
+            /** @description Validation Error */
+            422: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["HTTPValidationError"];
+                };
+            };
+        };
+    };
     create_documents_bulk_api_projects__project_id__documents_bulk_post: {
         parameters: {
             query?: never;
 
@@ -547,7 +547,7 @@
       const examples_with_feedback = currentExamples.map((example) => ({
         user_agrees_with_judge:
           example.model_says_meets_spec === example.user_says_meets_spec,
-        user_feedback: example.feedback,
+        feedback: example.feedback,
         input: example.input,
         output: example.output,
         fails_specification: !example.user_says_meets_spec,
Original file line number	Diff line number	Diff line change
`@@ -207,7 +207,6 @@ def create_task_run_from_reviewed(`
`207`	`207`	`},`
`208`	`208`	`),`
`209`	`209`	`),`
`210`		`- user_feedback=example.feedback if example.feedback else None,`
`211`	`210`	`tags=tags,`
`212`	`211`	`)`
`213`	`212`