Skip to content

Commit e86fe2b

Browse files
authored
Merge pull request #1160 from Kiln-AI/sfierro/skill-refs-fix
References subdirectory fix for skills
2 parents 60dde89 + c67d161 commit e86fe2b

File tree

4 files changed

+177
-74
lines changed

4 files changed

+177
-74
lines changed

libs/core/kiln_ai/datamodel/skill.py

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def body(self) -> str:
6262
"""Read the markdown body from SKILL.md (content after YAML frontmatter)."""
6363
return _parse_skill_md_body(self.skill_md_raw())
6464

65-
# -- References --
65+
# -- Resources (references & assets) --
6666

6767
def references_dir(self) -> Path:
6868
if self.path is None:
@@ -71,19 +71,41 @@ def references_dir(self) -> Path:
7171
)
7272
return self.path.parent / "references"
7373

74-
def read_reference(self, filename: str) -> str:
75-
"""Read a reference file's content. Raises ValueError if path traversal, FileNotFoundError if missing."""
76-
path = self._validated_reference_path(filename)
74+
def assets_dir(self) -> Path:
75+
if self.path is None:
76+
raise ValueError("Skill must be saved before accessing assets directory")
77+
return self.path.parent / "assets"
78+
79+
def read_reference(self, relative_path: str) -> str:
80+
"""Read a reference file. Raises ValueError for path traversal or non-text, FileNotFoundError if missing."""
81+
return self._read_resource(self.references_dir(), relative_path)
82+
83+
def read_asset(self, relative_path: str) -> str:
84+
"""Read an asset file. Raises ValueError for path traversal or non-text, FileNotFoundError if missing."""
85+
return self._read_resource(self.assets_dir(), relative_path)
86+
87+
def _read_resource(self, base_dir: Path, relative_path: str) -> str:
88+
"""Read a resource file, validating it resolves within base_dir and is readable text."""
89+
if not relative_path or not relative_path.strip():
90+
raise ValueError("Path cannot be empty")
91+
92+
target = base_dir / relative_path
7793
try:
78-
return path.read_text(encoding="utf-8")
79-
except FileNotFoundError:
80-
raise FileNotFoundError(f"Reference file not found: {filename}") from None
94+
resolved = target.resolve()
95+
resolved.relative_to(base_dir.resolve())
96+
except ValueError:
97+
raise ValueError("Path traversal is not allowed") from None
8198

82-
def _validated_reference_path(self, filename: str) -> Path:
83-
_validate_filename(filename)
84-
if not filename.endswith(".md"):
85-
raise ValueError("Reference files must have a .md extension")
86-
return self.references_dir() / filename
99+
try:
100+
return resolved.read_text(encoding="utf-8")
101+
except FileNotFoundError:
102+
raise FileNotFoundError(
103+
f"Resource file not found: {relative_path}"
104+
) from None
105+
except UnicodeDecodeError:
106+
raise ValueError(
107+
f"File is not a readable text file: {relative_path}"
108+
) from None
87109

88110
def save_skill_md(self, body: str) -> None:
89111
"""Write SKILL.md with YAML frontmatter (name, description) + markdown body.
@@ -101,16 +123,7 @@ def save_skill_md(self, body: str) -> None:
101123
content = f"---\n{frontmatter}\n---\n\n{body}"
102124
self.skill_md_path().write_text(content, encoding="utf-8")
103125
self.references_dir().mkdir(exist_ok=True)
104-
105-
106-
def _validate_filename(filename: str) -> None:
107-
"""Reject filenames that are empty, contain path separators, or are traversal components."""
108-
if not filename or not filename.strip():
109-
raise ValueError("Filename cannot be empty")
110-
if "/" in filename or "\\" in filename:
111-
raise ValueError("Filename must not contain path separators")
112-
if filename == "." or filename == "..":
113-
raise ValueError("Filename must not be a path traversal component")
126+
self.assets_dir().mkdir(exist_ok=True)
114127

115128

116129
def _parse_skill_md_body(raw: str) -> str:

libs/core/kiln_ai/datamodel/test_skill.py

Lines changed: 84 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from kiln_ai.datamodel.skill import (
88
Skill,
99
_parse_skill_md_body,
10-
_validate_filename,
1110
)
1211

1312

@@ -343,42 +342,14 @@ def test_round_trip_description_with_dashes(mock_project):
343342
assert skill.body() == "The body"
344343

345344

346-
# -- Filename validation tests --
347-
348-
349-
class TestValidateFilename:
350-
@pytest.mark.parametrize(
351-
"filename",
352-
[
353-
"../etc/passwd",
354-
"foo/../bar",
355-
"..",
356-
".",
357-
"sub/dir.md",
358-
"back\\slash.md",
359-
"",
360-
" ",
361-
],
362-
)
363-
def test_invalid_filenames(self, filename):
364-
with pytest.raises(ValueError):
365-
_validate_filename(filename)
366-
367-
@pytest.mark.parametrize(
368-
"filename",
369-
["REFERENCE.md", "finance.md", "schema.json", "diagram.png", "a"],
370-
)
371-
def test_valid_filenames(self, filename):
372-
_validate_filename(filename)
373-
374-
375-
# -- References tests --
345+
# -- References & assets tests --
376346

377347

378348
class TestReferences:
379-
def test_save_skill_md_creates_references_dir(self, mock_project):
349+
def test_save_skill_md_creates_dirs(self, mock_project):
380350
skill = save_skill_with_body(mock_project)
381351
assert skill.references_dir().is_dir()
352+
assert skill.assets_dir().is_dir()
382353

383354
def test_read_reference(self, mock_project):
384355
skill = save_skill_with_body(mock_project)
@@ -388,24 +359,94 @@ def test_read_reference(self, mock_project):
388359

389360
def test_read_reference_not_found(self, mock_project):
390361
skill = save_skill_with_body(mock_project)
391-
with pytest.raises(FileNotFoundError, match="Reference file not found"):
362+
with pytest.raises(FileNotFoundError, match="Resource file not found"):
392363
skill.read_reference("missing.md")
393364

394-
@pytest.mark.parametrize("filename", ["../etc/passwd", "sub/dir.md", ".."])
395-
def test_reference_path_traversal(self, mock_project, filename):
365+
@pytest.mark.parametrize("path", ["../etc/passwd", "..", "../../secret.txt"])
366+
def test_reference_path_traversal(self, mock_project, path):
367+
skill = save_skill_with_body(mock_project)
368+
with pytest.raises(ValueError, match="Path traversal"):
369+
skill.read_reference(path)
370+
371+
def test_reference_empty_path(self, mock_project):
372+
skill = save_skill_with_body(mock_project)
373+
with pytest.raises(ValueError, match="Path cannot be empty"):
374+
skill.read_reference("")
375+
with pytest.raises(ValueError, match="Path cannot be empty"):
376+
skill.read_reference(" ")
377+
378+
def test_read_reference_in_subdirectory(self, mock_project):
396379
skill = save_skill_with_body(mock_project)
397-
with pytest.raises(ValueError):
398-
skill.read_reference(filename)
380+
sub_dir = skill.references_dir() / "guides"
381+
sub_dir.mkdir(parents=True, exist_ok=True)
382+
(sub_dir / "style.md").write_text("# Style Guide", encoding="utf-8")
383+
assert skill.read_reference("guides/style.md") == "# Style Guide"
384+
385+
def test_read_reference_in_deeply_nested_subdirectory(self, mock_project):
386+
skill = save_skill_with_body(mock_project)
387+
nested_dir = skill.references_dir() / "a" / "b" / "c"
388+
nested_dir.mkdir(parents=True, exist_ok=True)
389+
(nested_dir / "deep.md").write_text("Deep content", encoding="utf-8")
390+
assert skill.read_reference("a/b/c/deep.md") == "Deep content"
391+
392+
@pytest.mark.parametrize(
393+
"filename,content",
394+
[
395+
("notes.txt", "Plain text notes"),
396+
("data.json", '{"key": "value"}'),
397+
("prices.csv", "item,price\nwidget,9.99"),
398+
("config.yaml", "key: value"),
399+
],
400+
)
401+
def test_non_md_extensions_accepted(self, mock_project, filename, content):
402+
skill = save_skill_with_body(mock_project)
403+
(skill.references_dir() / filename).write_text(content, encoding="utf-8")
404+
assert skill.read_reference(filename) == content
405+
406+
def test_binary_file_rejected(self, mock_project):
407+
skill = save_skill_with_body(mock_project)
408+
(skill.references_dir() / "image.png").write_bytes(b"\x89PNG\r\n\x1a\n\x00")
409+
with pytest.raises(ValueError, match="not a readable text file"):
410+
skill.read_reference("image.png")
399411

400412
def test_references_dir_requires_saved_skill(self):
401413
skill = make_skill()
402414
with pytest.raises(ValueError, match="Skill must be saved"):
403415
skill.references_dir()
404416

405-
@pytest.mark.parametrize(
406-
"filename", ["notes.txt", "data.json", "image.png", "readme"]
407-
)
408-
def test_non_md_extension_rejected(self, mock_project, filename):
417+
418+
class TestAssets:
419+
def test_read_asset(self, mock_project):
420+
skill = save_skill_with_body(mock_project)
421+
(skill.assets_dir() / "template.csv").write_text(
422+
"col1,col2\na,b", encoding="utf-8"
423+
)
424+
assert skill.read_asset("template.csv") == "col1,col2\na,b"
425+
426+
def test_read_asset_in_subdirectory(self, mock_project):
409427
skill = save_skill_with_body(mock_project)
410-
with pytest.raises(ValueError, match=r"\.md extension"):
411-
skill.read_reference(filename)
428+
sub_dir = skill.assets_dir() / "data"
429+
sub_dir.mkdir(parents=True, exist_ok=True)
430+
(sub_dir / "prices.csv").write_text("item,price", encoding="utf-8")
431+
assert skill.read_asset("data/prices.csv") == "item,price"
432+
433+
def test_read_asset_not_found(self, mock_project):
434+
skill = save_skill_with_body(mock_project)
435+
with pytest.raises(FileNotFoundError, match="Resource file not found"):
436+
skill.read_asset("missing.csv")
437+
438+
def test_asset_path_traversal(self, mock_project):
439+
skill = save_skill_with_body(mock_project)
440+
with pytest.raises(ValueError, match="Path traversal"):
441+
skill.read_asset("../etc/passwd")
442+
443+
def test_asset_binary_file_rejected(self, mock_project):
444+
skill = save_skill_with_body(mock_project)
445+
(skill.assets_dir() / "photo.jpg").write_bytes(b"\xff\xd8\xff\xe0\x00\x10JFIF")
446+
with pytest.raises(ValueError, match="not a readable text file"):
447+
skill.read_asset("photo.jpg")
448+
449+
def test_assets_dir_requires_saved_skill(self):
450+
skill = make_skill()
451+
with pytest.raises(ValueError, match="Skill must be saved"):
452+
skill.assets_dir()

libs/core/kiln_ai/tools/skill_tool.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
ToolCallResult,
88
)
99

10-
ALLOWED_RESOURCE_PREFIXES = ("references/",)
10+
ALLOWED_RESOURCE_PREFIXES = ("references/", "assets/")
1111

1212

1313
class SkillTool(KilnToolInterface):
@@ -38,7 +38,7 @@ async def description(self) -> str:
3838
"may help solve the user's task. Calling the tool with a skill name loads that skill's "
3939
"full instructions. If the skill references additional files "
4040
"relevant to the task, load them by passing a 'resource' path "
41-
"(e.g. 'references/filename.md')."
41+
"(e.g. 'references/guide.md', 'references/subdir/notes.txt', or 'assets/template.csv')."
4242
)
4343

4444
async def toolcall_definition(self) -> ToolCallDefinition:
@@ -56,7 +56,7 @@ async def toolcall_definition(self) -> ToolCallDefinition:
5656
},
5757
"resource": {
5858
"type": "string",
59-
"description": "Optional. Path to a specific resource file within the skill (e.g. 'references/REFERENCE.md'). If omitted, returns the skill's main instructions.",
59+
"description": "Optional. Path to a specific resource file within the skill (e.g. 'references/guide.md', 'assets/data.csv'). If omitted, returns the skill's main instructions.",
6060
},
6161
},
6262
"required": ["name"],
@@ -92,25 +92,29 @@ async def run(
9292
return ToolCallResult(output=body)
9393

9494
def _load_resource(self, skill: Skill, resource: str) -> ToolCallResult:
95-
"""Load a resource file from the references/ directory."""
95+
"""Load a resource file from an allowed subdirectory (references/ or assets/)."""
9696
if not any(resource.startswith(p) for p in ALLOWED_RESOURCE_PREFIXES):
9797
return ToolCallResult(
9898
output=f"Error: Resource path must start with one of: {', '.join(ALLOWED_RESOURCE_PREFIXES)}"
9999
)
100100

101-
if ".." in resource:
102-
return ToolCallResult(output="Error: Invalid resource path.")
103-
104101
parts = resource.split("/", 1)
105102
if len(parts) != 2 or not parts[1]:
106103
return ToolCallResult(
107104
output="Error: Resource path must include a filename after the directory prefix."
108105
)
109106

110-
_, filename = parts
107+
prefix, relative_path = parts
111108

112109
try:
113-
content = skill.read_reference(filename)
110+
if prefix == "references":
111+
content = skill.read_reference(relative_path)
112+
elif prefix == "assets":
113+
content = skill.read_asset(relative_path)
114+
else:
115+
return ToolCallResult(
116+
output=f"Error: Unknown resource directory: {prefix}"
117+
)
114118
return ToolCallResult(output=content)
115119
except FileNotFoundError:
116120
return ToolCallResult(output=f"Error: Resource not found: {resource}")

libs/core/kiln_ai/tools/test_skill_tool.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ async def test_description_mentions_resource(self, skill_tool: SkillTool):
7676
desc = await skill_tool.description()
7777
assert "Load an agent skill by name" in desc
7878
assert "resource" in desc
79+
assert "assets/" in desc
7980
assert len(desc) <= 1024
8081

8182
async def test_toolcall_definition_schema(self, skill_tool: SkillTool):
@@ -150,10 +151,42 @@ async def test_load_reference(
150151
)
151152
assert result.output == "# Guide\nReference content."
152153

154+
async def test_load_reference_in_subdirectory(
155+
self, sample_skills: list[Skill], skill_tool: SkillTool
156+
):
157+
sub_dir = sample_skills[0].references_dir() / "guides"
158+
sub_dir.mkdir(parents=True, exist_ok=True)
159+
(sub_dir / "style.md").write_text("# Style Guide", encoding="utf-8")
160+
result = await skill_tool.run(
161+
name="code-review", resource="references/guides/style.md"
162+
)
163+
assert result.output == "# Style Guide"
164+
165+
async def test_load_asset(self, sample_skills: list[Skill], skill_tool: SkillTool):
166+
assets_dir = sample_skills[0].assets_dir()
167+
assets_dir.mkdir(parents=True, exist_ok=True)
168+
(assets_dir / "prices.csv").write_text(
169+
"item,price\nwidget,9.99", encoding="utf-8"
170+
)
171+
result = await skill_tool.run(name="code-review", resource="assets/prices.csv")
172+
assert result.output == "item,price\nwidget,9.99"
173+
174+
async def test_load_asset_in_subdirectory(
175+
self, sample_skills: list[Skill], skill_tool: SkillTool
176+
):
177+
sub_dir = sample_skills[0].assets_dir() / "data"
178+
sub_dir.mkdir(parents=True, exist_ok=True)
179+
(sub_dir / "config.json").write_text('{"key": "val"}', encoding="utf-8")
180+
result = await skill_tool.run(
181+
name="code-review", resource="assets/data/config.json"
182+
)
183+
assert result.output == '{"key": "val"}'
184+
153185
async def test_invalid_prefix(self, skill_tool: SkillTool):
154186
result = await skill_tool.run(name="code-review", resource="secrets/key.txt")
155187
assert "Error" in result.output
156188
assert "references/" in result.output
189+
assert "assets/" in result.output
157190

158191
async def test_path_traversal_blocked(self, skill_tool: SkillTool):
159192
result = await skill_tool.run(
@@ -176,6 +209,18 @@ async def test_without_resource_returns_body(self, skill_tool: SkillTool):
176209
result = await skill_tool.run(name="code-review")
177210
assert result.output == "## Code Review\nCheck for bugs."
178211

212+
async def test_binary_resource_rejected(
213+
self, sample_skills: list[Skill], skill_tool: SkillTool
214+
):
215+
ref_dir = sample_skills[0].references_dir()
216+
ref_dir.mkdir(parents=True, exist_ok=True)
217+
(ref_dir / "image.png").write_bytes(b"\x89PNG\r\n\x1a\n\x00")
218+
result = await skill_tool.run(
219+
name="code-review", resource="references/image.png"
220+
)
221+
assert "Error" in result.output
222+
assert "not a readable text file" in result.output
223+
179224

180225
class TestSkillToolId:
181226
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)