Merge pull request #1160 from Kiln-AI/sfierro/skill-refs-fix

leonardmq · web-flow · commit e86fe2b5aa63 · 2026-03-31T22:23:14.000+08:00
References subdirectory fix for skills
diff --git a/libs/core/kiln_ai/datamodel/skill.py b/libs/core/kiln_ai/datamodel/skill.py
@@ -62,7 +62,7 @@ def body(self) -> str:
         """Read the markdown body from SKILL.md (content after YAML frontmatter)."""
         return _parse_skill_md_body(self.skill_md_raw())
 
-    # -- References --
+    # -- Resources (references & assets) --
 
     def references_dir(self) -> Path:
         if self.path is None:
@@ -71,19 +71,41 @@ def references_dir(self) -> Path:
             )
         return self.path.parent / "references"
 
-    def read_reference(self, filename: str) -> str:
-        """Read a reference file's content. Raises ValueError if path traversal, FileNotFoundError if missing."""
-        path = self._validated_reference_path(filename)
+    def assets_dir(self) -> Path:
+        if self.path is None:
+            raise ValueError("Skill must be saved before accessing assets directory")
+        return self.path.parent / "assets"
+
+    def read_reference(self, relative_path: str) -> str:
+        """Read a reference file. Raises ValueError for path traversal or non-text, FileNotFoundError if missing."""
+        return self._read_resource(self.references_dir(), relative_path)
+
+    def read_asset(self, relative_path: str) -> str:
+        """Read an asset file. Raises ValueError for path traversal or non-text, FileNotFoundError if missing."""
+        return self._read_resource(self.assets_dir(), relative_path)
+
+    def _read_resource(self, base_dir: Path, relative_path: str) -> str:
+        """Read a resource file, validating it resolves within base_dir and is readable text."""
+        if not relative_path or not relative_path.strip():
+            raise ValueError("Path cannot be empty")
+
+        target = base_dir / relative_path
         try:
-            return path.read_text(encoding="utf-8")
-        except FileNotFoundError:
-            raise FileNotFoundError(f"Reference file not found: {filename}") from None
+            resolved = target.resolve()
+            resolved.relative_to(base_dir.resolve())
+        except ValueError:
+            raise ValueError("Path traversal is not allowed") from None
 
-    def _validated_reference_path(self, filename: str) -> Path:
-        _validate_filename(filename)
-        if not filename.endswith(".md"):
-            raise ValueError("Reference files must have a .md extension")
-        return self.references_dir() / filename
+        try:
+            return resolved.read_text(encoding="utf-8")
+        except FileNotFoundError:
+            raise FileNotFoundError(
+                f"Resource file not found: {relative_path}"
+            ) from None
+        except UnicodeDecodeError:
+            raise ValueError(
+                f"File is not a readable text file: {relative_path}"
+            ) from None
 
     def save_skill_md(self, body: str) -> None:
         """Write SKILL.md with YAML frontmatter (name, description) + markdown body.
@@ -101,16 +123,7 @@ def save_skill_md(self, body: str) -> None:
         content = f"---\n{frontmatter}\n---\n\n{body}"
         self.skill_md_path().write_text(content, encoding="utf-8")
         self.references_dir().mkdir(exist_ok=True)
-
-
-def _validate_filename(filename: str) -> None:
-    """Reject filenames that are empty, contain path separators, or are traversal components."""
-    if not filename or not filename.strip():
-        raise ValueError("Filename cannot be empty")
-    if "/" in filename or "\\" in filename:
-        raise ValueError("Filename must not contain path separators")
-    if filename == "." or filename == "..":
-        raise ValueError("Filename must not be a path traversal component")
+        self.assets_dir().mkdir(exist_ok=True)
 
 
 def _parse_skill_md_body(raw: str) -> str:
diff --git a/libs/core/kiln_ai/datamodel/test_skill.py b/libs/core/kiln_ai/datamodel/test_skill.py
@@ -7,7 +7,6 @@
 from kiln_ai.datamodel.skill import (
     Skill,
     _parse_skill_md_body,
-    _validate_filename,
 )
 
 
@@ -343,42 +342,14 @@ def test_round_trip_description_with_dashes(mock_project):
     assert skill.body() == "The body"
 
 
-# -- Filename validation tests --
-
-
-class TestValidateFilename:
-    @pytest.mark.parametrize(
-        "filename",
-        [
-            "../etc/passwd",
-            "foo/../bar",
-            "..",
-            ".",
-            "sub/dir.md",
-            "back\\slash.md",
-            "",
-            "   ",
-        ],
-    )
-    def test_invalid_filenames(self, filename):
-        with pytest.raises(ValueError):
-            _validate_filename(filename)
-
-    @pytest.mark.parametrize(
-        "filename",
-        ["REFERENCE.md", "finance.md", "schema.json", "diagram.png", "a"],
-    )
-    def test_valid_filenames(self, filename):
-        _validate_filename(filename)
-
-
-# -- References tests --
+# -- References & assets tests --
 
 
 class TestReferences:
-    def test_save_skill_md_creates_references_dir(self, mock_project):
+    def test_save_skill_md_creates_dirs(self, mock_project):
         skill = save_skill_with_body(mock_project)
         assert skill.references_dir().is_dir()
+        assert skill.assets_dir().is_dir()
 
     def test_read_reference(self, mock_project):
         skill = save_skill_with_body(mock_project)
@@ -388,24 +359,94 @@ def test_read_reference(self, mock_project):
 
     def test_read_reference_not_found(self, mock_project):
         skill = save_skill_with_body(mock_project)
-        with pytest.raises(FileNotFoundError, match="Reference file not found"):
+        with pytest.raises(FileNotFoundError, match="Resource file not found"):
             skill.read_reference("missing.md")
 
-    @pytest.mark.parametrize("filename", ["../etc/passwd", "sub/dir.md", ".."])
-    def test_reference_path_traversal(self, mock_project, filename):
+    @pytest.mark.parametrize("path", ["../etc/passwd", "..", "../../secret.txt"])
+    def test_reference_path_traversal(self, mock_project, path):
+        skill = save_skill_with_body(mock_project)
+        with pytest.raises(ValueError, match="Path traversal"):
+            skill.read_reference(path)
+
+    def test_reference_empty_path(self, mock_project):
+        skill = save_skill_with_body(mock_project)
+        with pytest.raises(ValueError, match="Path cannot be empty"):
+            skill.read_reference("")
+        with pytest.raises(ValueError, match="Path cannot be empty"):
+            skill.read_reference("   ")
+
+    def test_read_reference_in_subdirectory(self, mock_project):
         skill = save_skill_with_body(mock_project)
-        with pytest.raises(ValueError):
-            skill.read_reference(filename)
+        sub_dir = skill.references_dir() / "guides"
+        sub_dir.mkdir(parents=True, exist_ok=True)
+        (sub_dir / "style.md").write_text("# Style Guide", encoding="utf-8")
+        assert skill.read_reference("guides/style.md") == "# Style Guide"
+
+    def test_read_reference_in_deeply_nested_subdirectory(self, mock_project):
+        skill = save_skill_with_body(mock_project)
+        nested_dir = skill.references_dir() / "a" / "b" / "c"
+        nested_dir.mkdir(parents=True, exist_ok=True)
+        (nested_dir / "deep.md").write_text("Deep content", encoding="utf-8")
+        assert skill.read_reference("a/b/c/deep.md") == "Deep content"
+
+    @pytest.mark.parametrize(
+        "filename,content",
+        [
+            ("notes.txt", "Plain text notes"),
+            ("data.json", '{"key": "value"}'),
+            ("prices.csv", "item,price\nwidget,9.99"),
+            ("config.yaml", "key: value"),
+        ],
+    )
+    def test_non_md_extensions_accepted(self, mock_project, filename, content):
+        skill = save_skill_with_body(mock_project)
+        (skill.references_dir() / filename).write_text(content, encoding="utf-8")
+        assert skill.read_reference(filename) == content
+
+    def test_binary_file_rejected(self, mock_project):
+        skill = save_skill_with_body(mock_project)
+        (skill.references_dir() / "image.png").write_bytes(b"\x89PNG\r\n\x1a\n\x00")
+        with pytest.raises(ValueError, match="not a readable text file"):
+            skill.read_reference("image.png")
 
     def test_references_dir_requires_saved_skill(self):
         skill = make_skill()
         with pytest.raises(ValueError, match="Skill must be saved"):
             skill.references_dir()
 
-    @pytest.mark.parametrize(
-        "filename", ["notes.txt", "data.json", "image.png", "readme"]
-    )
-    def test_non_md_extension_rejected(self, mock_project, filename):
+
+class TestAssets:
+    def test_read_asset(self, mock_project):
+        skill = save_skill_with_body(mock_project)
+        (skill.assets_dir() / "template.csv").write_text(
+            "col1,col2\na,b", encoding="utf-8"
+        )
+        assert skill.read_asset("template.csv") == "col1,col2\na,b"
+
+    def test_read_asset_in_subdirectory(self, mock_project):
         skill = save_skill_with_body(mock_project)
-        with pytest.raises(ValueError, match=r"\.md extension"):
-            skill.read_reference(filename)
+        sub_dir = skill.assets_dir() / "data"
+        sub_dir.mkdir(parents=True, exist_ok=True)
+        (sub_dir / "prices.csv").write_text("item,price", encoding="utf-8")
+        assert skill.read_asset("data/prices.csv") == "item,price"
+
+    def test_read_asset_not_found(self, mock_project):
+        skill = save_skill_with_body(mock_project)
+        with pytest.raises(FileNotFoundError, match="Resource file not found"):
+            skill.read_asset("missing.csv")
+
+    def test_asset_path_traversal(self, mock_project):
+        skill = save_skill_with_body(mock_project)
+        with pytest.raises(ValueError, match="Path traversal"):
+            skill.read_asset("../etc/passwd")
+
+    def test_asset_binary_file_rejected(self, mock_project):
+        skill = save_skill_with_body(mock_project)
+        (skill.assets_dir() / "photo.jpg").write_bytes(b"\xff\xd8\xff\xe0\x00\x10JFIF")
+        with pytest.raises(ValueError, match="not a readable text file"):
+            skill.read_asset("photo.jpg")
+
+    def test_assets_dir_requires_saved_skill(self):
+        skill = make_skill()
+        with pytest.raises(ValueError, match="Skill must be saved"):
+            skill.assets_dir()
diff --git a/libs/core/kiln_ai/tools/skill_tool.py b/libs/core/kiln_ai/tools/skill_tool.py
@@ -7,7 +7,7 @@
     ToolCallResult,
 )
 
-ALLOWED_RESOURCE_PREFIXES = ("references/",)
+ALLOWED_RESOURCE_PREFIXES = ("references/", "assets/")
 
 
 class SkillTool(KilnToolInterface):
@@ -38,7 +38,7 @@ async def description(self) -> str:
             "may help solve the user's task. Calling the tool with a skill name loads that skill's "
             "full instructions. If the skill references additional files "
             "relevant to the task, load them by passing a 'resource' path "
-            "(e.g. 'references/filename.md')."
+            "(e.g. 'references/guide.md', 'references/subdir/notes.txt', or 'assets/template.csv')."
         )
 
     async def toolcall_definition(self) -> ToolCallDefinition:
@@ -56,7 +56,7 @@ async def toolcall_definition(self) -> ToolCallDefinition:
                         },
                         "resource": {
                             "type": "string",
-                            "description": "Optional. Path to a specific resource file within the skill (e.g. 'references/REFERENCE.md'). If omitted, returns the skill's main instructions.",
+                            "description": "Optional. Path to a specific resource file within the skill (e.g. 'references/guide.md', 'assets/data.csv'). If omitted, returns the skill's main instructions.",
                         },
                     },
                     "required": ["name"],
@@ -92,25 +92,29 @@ async def run(
         return ToolCallResult(output=body)
 
     def _load_resource(self, skill: Skill, resource: str) -> ToolCallResult:
-        """Load a resource file from the references/ directory."""
+        """Load a resource file from an allowed subdirectory (references/ or assets/)."""
         if not any(resource.startswith(p) for p in ALLOWED_RESOURCE_PREFIXES):
             return ToolCallResult(
                 output=f"Error: Resource path must start with one of: {', '.join(ALLOWED_RESOURCE_PREFIXES)}"
             )
 
-        if ".." in resource:
-            return ToolCallResult(output="Error: Invalid resource path.")
-
         parts = resource.split("/", 1)
         if len(parts) != 2 or not parts[1]:
             return ToolCallResult(
                 output="Error: Resource path must include a filename after the directory prefix."
             )
 
-        _, filename = parts
+        prefix, relative_path = parts
 
         try:
-            content = skill.read_reference(filename)
+            if prefix == "references":
+                content = skill.read_reference(relative_path)
+            elif prefix == "assets":
+                content = skill.read_asset(relative_path)
+            else:
+                return ToolCallResult(
+                    output=f"Error: Unknown resource directory: {prefix}"
+                )
             return ToolCallResult(output=content)
         except FileNotFoundError:
             return ToolCallResult(output=f"Error: Resource not found: {resource}")
diff --git a/libs/core/kiln_ai/tools/test_skill_tool.py b/libs/core/kiln_ai/tools/test_skill_tool.py
@@ -76,6 +76,7 @@ async def test_description_mentions_resource(self, skill_tool: SkillTool):
         desc = await skill_tool.description()
         assert "Load an agent skill by name" in desc
         assert "resource" in desc
+        assert "assets/" in desc
         assert len(desc) <= 1024
 
     async def test_toolcall_definition_schema(self, skill_tool: SkillTool):
@@ -150,10 +151,42 @@ async def test_load_reference(
         )
         assert result.output == "# Guide\nReference content."
 
+    async def test_load_reference_in_subdirectory(
+        self, sample_skills: list[Skill], skill_tool: SkillTool
+    ):
+        sub_dir = sample_skills[0].references_dir() / "guides"
+        sub_dir.mkdir(parents=True, exist_ok=True)
+        (sub_dir / "style.md").write_text("# Style Guide", encoding="utf-8")
+        result = await skill_tool.run(
+            name="code-review", resource="references/guides/style.md"
+        )
+        assert result.output == "# Style Guide"
+
+    async def test_load_asset(self, sample_skills: list[Skill], skill_tool: SkillTool):
+        assets_dir = sample_skills[0].assets_dir()
+        assets_dir.mkdir(parents=True, exist_ok=True)
+        (assets_dir / "prices.csv").write_text(
+            "item,price\nwidget,9.99", encoding="utf-8"
+        )
+        result = await skill_tool.run(name="code-review", resource="assets/prices.csv")
+        assert result.output == "item,price\nwidget,9.99"
+
+    async def test_load_asset_in_subdirectory(
+        self, sample_skills: list[Skill], skill_tool: SkillTool
+    ):
+        sub_dir = sample_skills[0].assets_dir() / "data"
+        sub_dir.mkdir(parents=True, exist_ok=True)
+        (sub_dir / "config.json").write_text('{"key": "val"}', encoding="utf-8")
+        result = await skill_tool.run(
+            name="code-review", resource="assets/data/config.json"
+        )
+        assert result.output == '{"key": "val"}'
+
     async def test_invalid_prefix(self, skill_tool: SkillTool):
         result = await skill_tool.run(name="code-review", resource="secrets/key.txt")
         assert "Error" in result.output
         assert "references/" in result.output
+        assert "assets/" in result.output
 
     async def test_path_traversal_blocked(self, skill_tool: SkillTool):
         result = await skill_tool.run(
@@ -176,6 +209,18 @@ async def test_without_resource_returns_body(self, skill_tool: SkillTool):
         result = await skill_tool.run(name="code-review")
         assert result.output == "## Code Review\nCheck for bugs."
 
+    async def test_binary_resource_rejected(
+        self, sample_skills: list[Skill], skill_tool: SkillTool
+    ):
+        ref_dir = sample_skills[0].references_dir()
+        ref_dir.mkdir(parents=True, exist_ok=True)
+        (ref_dir / "image.png").write_bytes(b"\x89PNG\r\n\x1a\n\x00")
+        result = await skill_tool.run(
+            name="code-review", resource="references/image.png"
+        )
+        assert "Error" in result.output
+        assert "not a readable text file" in result.output
+
 
 class TestSkillToolId:
     @pytest.mark.parametrize(