Skip to content

Commit 48cfb0d

Browse files
Improve robustness of CSV dataset loader (#64)
* Add basic evaluation example script * Fix typos and improve clarity in docstrings across core modules * Add Google-style docstrings to BaseAdapter methods * Format base adapter using ruff * docs: add instructions for running CI checks locally * Remove example file unrelated to CI documentation * Add py.typed marker for type checker support * Add test for markdown emoji encoding * Fix test_reporting: correct class usage, fields, and Windows-safe to_markdown * All tests passing: fixed dependencies and formatting * Update dependencies / poetry config * Fix emoji markdown test and align ScenarioRun signature * Fix reporting tests and update dependencies * Fix missing required dependencies (jsonschema, scipy) * Update all files * Add CSV export support for SuiteResult * Fix SIM118 linter issue in SuiteResult.to_csv * Fix Ruff formatting issues in SuiteResult.to_csv * Fix CSV export: iterate over dict keys correctly and pass Ruff lint * Fix: SwarmAdapter imports and end_session duration tracking, fully linted * Format files and remove lint error * Fix the issue of csv file * Improve robustness of CSV dataset loader * Replace parser function * Format CSV dataset loader --------- Signed-off-by: Jagriti-student <jagriti7989@gmail.com>
1 parent 23afcf8 commit 48cfb0d

1 file changed

Lines changed: 42 additions & 13 deletions

File tree

src/agentunit/datasets/base.py

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,25 @@ def _loader() -> Iterable[DatasetCase]:
7979
return DatasetSource(name=str(file_path.stem), loader=_loader)
8080

8181

82-
def load_local_csv(path: str | Path) -> DatasetSource:
82+
def _parse_list_field(value: str | None, delimiter: str) -> list[str] | None:
83+
"""Safely parse a delimited list field from CSV."""
84+
if not value or not isinstance(value, str):
85+
return None
86+
87+
if not delimiter:
88+
# Empty delimiter is invalid for str.split
89+
return [value.strip()] if value.strip() else None
90+
91+
parts = [item.strip() for item in value.split(delimiter)]
92+
cleaned = [p for p in parts if p]
93+
return cleaned or None
94+
95+
96+
def load_local_csv(
97+
path: str | Path,
98+
tools_delimiter: str = ";",
99+
context_delimiter: str = "||",
100+
) -> DatasetSource:
83101
file_path = Path(path)
84102
if not file_path.exists():
85103
msg = f"Dataset file not found: {file_path}"
@@ -88,18 +106,29 @@ def load_local_csv(path: str | Path) -> DatasetSource:
88106
def _loader() -> Iterable[DatasetCase]:
89107
with file_path.open(newline="", encoding="utf-8") as fh:
90108
reader = csv.DictReader(fh)
109+
91110
for idx, row in enumerate(reader):
92-
yield DatasetCase(
93-
id=row.get("id") or f"case-{idx}",
94-
query=row["query"],
95-
expected_output=row.get("expected_output"),
96-
tools=row.get("tools", "").split(";") if row.get("tools") else None,
97-
context=row.get("context", "").split("||") if row.get("context") else None,
98-
metadata={
99-
k: v
100-
for k, v in row.items()
101-
if k not in {"id", "query", "expected_output", "tools", "context"}
102-
},
103-
)
111+
try:
112+
yield DatasetCase(
113+
id=row.get("id") or f"case-{idx}",
114+
query=row["query"],
115+
expected_output=row.get("expected_output"),
116+
tools=_parse_list_field(row.get("tools"), tools_delimiter),
117+
context=_parse_list_field(row.get("context"), context_delimiter),
118+
metadata={
119+
k: v
120+
for k, v in row.items()
121+
if k
122+
not in {
123+
"id",
124+
"query",
125+
"expected_output",
126+
"tools",
127+
"context",
128+
}
129+
},
130+
)
131+
except Exception as exc:
132+
raise AgentUnitError(f"Malformed CSV row at index {idx}: {row}") from exc
104133

105134
return DatasetSource(name=str(file_path.stem), loader=_loader)

0 commit comments

Comments
 (0)