Skip to content

Commit b6e0db7

Browse files
committed
test(examples): add pytest suite for all example files
Add comprehensive example validation tests covering: - Marimo apps: syntax + import checks - Jupyter notebooks: JSON validity + cell syntax + imports - YAML configs: parse validation - Data files: CSV/Excel readability Handles Jupyter magic commands (!wget, !pip, etc.) by filtering them before AST parsing to avoid false syntax errors. 93 passed, 1 skipped, 0 failed.
1 parent 8f81dde commit b6e0db7

1 file changed

Lines changed: 334 additions & 0 deletions

File tree

tests/examples/test_examples.py

Lines changed: 334 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,334 @@
1+
"""Test suite for example files.
2+
3+
This module validates all example files in the example/ directory,
4+
including marimo apps, Jupyter notebooks, YAML configs, and data files.
5+
Tests verify syntax validity, structural integrity, and import compatibility
6+
without executing full training/inference workflows.
7+
"""
8+
9+
import ast
10+
import json
11+
from pathlib import Path
12+
13+
import pandas as pd
14+
import pytest
15+
import yaml
16+
17+
# Base path for examples
18+
EXAMPLE_DIR = Path(__file__).parent.parent.parent / "example"
19+
20+
21+
# ──────────────────────────────────────────────────────────────────────────────
22+
# Helpers
23+
# ──────────────────────────────────────────────────────────────────────────────
24+
def _is_magic_or_comment_only(source: str) -> bool:
25+
"""Check if a notebook cell contains only Jupyter magic commands or comments."""
26+
lines = source.splitlines()
27+
code_lines = [
28+
line
29+
for line in lines
30+
if line.strip()
31+
and not line.strip().startswith("!")
32+
and not line.strip().startswith("%")
33+
and not line.strip().startswith("#")
34+
]
35+
return len(code_lines) == 0
36+
37+
38+
def _strip_magic_lines(source: str) -> str:
39+
"""Remove Jupyter magic command lines from source."""
40+
lines = source.splitlines()
41+
return "\n".join(
42+
line
43+
for line in lines
44+
if line.strip()
45+
and not line.strip().startswith("!")
46+
and not line.strip().startswith("%")
47+
)
48+
49+
50+
# ──────────────────────────────────────────────────────────────────────────────
51+
# File discovery helpers (module level for parametrize)
52+
# ──────────────────────────────────────────────────────────────────────────────
53+
def _get_marimo_files():
54+
marimo_dir = EXAMPLE_DIR / "marimo"
55+
files = []
56+
if marimo_dir.exists():
57+
for subdir in sorted(marimo_dir.iterdir()):
58+
if subdir.is_dir():
59+
for py_file in sorted(subdir.glob("*.py")):
60+
files.append(py_file)
61+
return files
62+
63+
64+
def _get_notebook_files():
65+
dirs = [EXAMPLE_DIR / "notebooks", EXAMPLE_DIR / "mcp_example"]
66+
files = []
67+
for nb_dir in dirs:
68+
if nb_dir.exists():
69+
for nb_file in sorted(nb_dir.rglob("*.ipynb")):
70+
if ".ipynb_checkpoints" not in str(nb_file):
71+
files.append(nb_file)
72+
return files
73+
74+
75+
def _get_yaml_files():
76+
files = list(EXAMPLE_DIR.rglob("*.yaml")) + list(EXAMPLE_DIR.rglob("*.yml"))
77+
return sorted(files)
78+
79+
80+
def _get_csv_files():
81+
return sorted(EXAMPLE_DIR.rglob("*.csv"))
82+
83+
84+
def _get_excel_files():
85+
return sorted(EXAMPLE_DIR.rglob("*.xlsx"))
86+
87+
88+
MARIMO_FILES = _get_marimo_files()
89+
NOTEBOOK_FILES = _get_notebook_files()
90+
YAML_FILES = _get_yaml_files()
91+
CSV_FILES = _get_csv_files()
92+
EXCEL_FILES = _get_excel_files()
93+
94+
95+
# ──────────────────────────────────────────────────────────────────────────────
96+
# Marimo Examples
97+
# ──────────────────────────────────────────────────────────────────────────────
98+
class TestMarimoExamples:
99+
"""Test cases for marimo example applications."""
100+
101+
@pytest.mark.skipif(not MARIMO_FILES, reason="No marimo files found")
102+
@pytest.mark.parametrize(
103+
"py_file",
104+
MARIMO_FILES,
105+
ids=lambda p: str(p.relative_to(EXAMPLE_DIR)),
106+
)
107+
def test_marimo_syntax(self, py_file: Path):
108+
"""Test that marimo .py files have valid Python syntax."""
109+
source = py_file.read_text(encoding="utf-8")
110+
try:
111+
ast.parse(source)
112+
except SyntaxError as e:
113+
pytest.fail(f"Syntax error in {py_file.name}: {e}")
114+
115+
@pytest.mark.skipif(not MARIMO_FILES, reason="No marimo files found")
116+
@pytest.mark.parametrize(
117+
"py_file",
118+
MARIMO_FILES,
119+
ids=lambda p: str(p.relative_to(EXAMPLE_DIR)),
120+
)
121+
def test_marimo_imports(self, py_file: Path):
122+
"""Test that import cells in marimo files can be executed."""
123+
source = py_file.read_text(encoding="utf-8")
124+
tree = ast.parse(source)
125+
126+
import_cells = []
127+
for node in ast.walk(tree):
128+
if isinstance(node, ast.FunctionDef):
129+
is_app_cell = any(
130+
isinstance(d, ast.Attribute) and d.attr == "cell"
131+
for d in node.decorator_list
132+
)
133+
if is_app_cell and "import " in ast.unparse(node):
134+
import_cells.append(ast.unparse(node))
135+
136+
if not import_cells:
137+
pytest.skip("No import cells found")
138+
139+
for cell in import_cells:
140+
try:
141+
compiled = compile(cell, str(py_file), "exec")
142+
exec(compiled, {"__file__": str(py_file)})
143+
except (ImportError, ModuleNotFoundError) as e:
144+
pytest.fail(f"Import error in {py_file.name}: {e}")
145+
except Exception:
146+
# Non-import errors (e.g., missing data files, UI calls) are acceptable
147+
pass
148+
149+
150+
# ──────────────────────────────────────────────────────────────────────────────
151+
# Jupyter Notebooks
152+
# ──────────────────────────────────────────────────────────────────────────────
153+
class TestNotebookExamples:
154+
"""Test cases for Jupyter notebook examples."""
155+
156+
@pytest.mark.skipif(not NOTEBOOK_FILES, reason="No notebook files found")
157+
@pytest.mark.parametrize(
158+
"nb_file",
159+
NOTEBOOK_FILES,
160+
ids=lambda p: str(p.relative_to(EXAMPLE_DIR)),
161+
)
162+
def test_notebook_json_validity(self, nb_file: Path):
163+
"""Test that notebooks are valid JSON with correct schema."""
164+
content = nb_file.read_text(encoding="utf-8")
165+
try:
166+
nb = json.loads(content)
167+
except json.JSONDecodeError as e:
168+
pytest.fail(f"Invalid JSON in {nb_file.name}: {e}")
169+
170+
assert isinstance(nb, dict), "Notebook root must be a dict"
171+
assert "cells" in nb, "Missing 'cells' key"
172+
assert "nbformat" in nb, "Missing 'nbformat' key"
173+
assert isinstance(nb["cells"], list), "'cells' must be a list"
174+
175+
for i, cell in enumerate(nb["cells"]):
176+
assert "cell_type" in cell, f"Cell {i}: missing 'cell_type'"
177+
assert cell["cell_type"] in (
178+
"code",
179+
"markdown",
180+
"raw",
181+
), f"Cell {i}: invalid cell_type"
182+
assert "source" in cell, f"Cell {i}: missing 'source'"
183+
184+
@pytest.mark.skipif(not NOTEBOOK_FILES, reason="No notebook files found")
185+
@pytest.mark.parametrize(
186+
"nb_file",
187+
NOTEBOOK_FILES,
188+
ids=lambda p: str(p.relative_to(EXAMPLE_DIR)),
189+
)
190+
def test_notebook_cell_syntax(self, nb_file: Path):
191+
"""Test that code cells have valid Python syntax."""
192+
content = nb_file.read_text(encoding="utf-8")
193+
nb = json.loads(content)
194+
195+
errors = []
196+
for i, cell in enumerate(nb["cells"]):
197+
if cell["cell_type"] != "code":
198+
continue
199+
200+
source = (
201+
"".join(cell["source"])
202+
if isinstance(cell["source"], list)
203+
else cell["source"]
204+
)
205+
if not source.strip():
206+
continue
207+
208+
if _is_magic_or_comment_only(source):
209+
continue
210+
211+
clean_source = _strip_magic_lines(source)
212+
if not clean_source.strip():
213+
continue
214+
215+
try:
216+
ast.parse(clean_source)
217+
except SyntaxError as e:
218+
errors.append(f"Cell {i}: {e}")
219+
220+
if errors:
221+
pytest.fail(
222+
f"Syntax errors in {nb_file.name}: {', '.join(errors[:3])}"
223+
)
224+
225+
@pytest.mark.skipif(not NOTEBOOK_FILES, reason="No notebook files found")
226+
@pytest.mark.parametrize(
227+
"nb_file",
228+
NOTEBOOK_FILES,
229+
ids=lambda p: str(p.relative_to(EXAMPLE_DIR)),
230+
)
231+
def test_notebook_imports(self, nb_file: Path):
232+
"""Test that import statements in notebooks can be resolved."""
233+
content = nb_file.read_text(encoding="utf-8")
234+
nb = json.loads(content)
235+
236+
import_statements = []
237+
for cell in nb["cells"]:
238+
if cell["cell_type"] != "code":
239+
continue
240+
241+
source = (
242+
"".join(cell["source"])
243+
if isinstance(cell["source"], list)
244+
else cell["source"]
245+
)
246+
if _is_magic_or_comment_only(source):
247+
continue
248+
249+
clean_source = _strip_magic_lines(source)
250+
if not clean_source.strip():
251+
continue
252+
253+
try:
254+
tree = ast.parse(clean_source)
255+
for node in ast.walk(tree):
256+
if isinstance(node, (ast.Import, ast.ImportFrom)):
257+
import_statements.append(ast.unparse(node))
258+
except SyntaxError:
259+
continue
260+
261+
if not import_statements:
262+
pytest.skip("No import statements found")
263+
264+
failed = []
265+
for stmt in import_statements:
266+
try:
267+
exec(compile(stmt, str(nb_file), "exec"), {})
268+
except (ImportError, ModuleNotFoundError) as e:
269+
failed.append(f"{stmt}: {e}")
270+
271+
if failed:
272+
pytest.fail(
273+
f"Failed imports in {nb_file.name}: {', '.join(failed[:3])}"
274+
)
275+
276+
277+
# ──────────────────────────────────────────────────────────────────────────────
278+
# YAML Configs
279+
# ──────────────────────────────────────────────────────────────────────────────
280+
class TestYamlConfigs:
281+
"""Test cases for YAML configuration files in examples."""
282+
283+
@pytest.mark.skipif(not YAML_FILES, reason="No YAML files found")
284+
@pytest.mark.parametrize(
285+
"yaml_file",
286+
YAML_FILES,
287+
ids=lambda p: str(p.relative_to(EXAMPLE_DIR)),
288+
)
289+
def test_yaml_validity(self, yaml_file: Path):
290+
"""Test that YAML config files parse correctly."""
291+
content = yaml_file.read_text(encoding="utf-8")
292+
try:
293+
yaml.safe_load(content)
294+
except yaml.YAMLError as e:
295+
pytest.fail(f"YAML error in {yaml_file.name}: {e}")
296+
297+
298+
# ──────────────────────────────────────────────────────────────────────────────
299+
# Data Files
300+
# ──────────────────────────────────────────────────────────────────────────────
301+
class TestDataFiles:
302+
"""Test cases for CSV and Excel data files in examples."""
303+
304+
@pytest.mark.skipif(not CSV_FILES, reason="No CSV files found")
305+
@pytest.mark.parametrize(
306+
"csv_file",
307+
CSV_FILES,
308+
ids=lambda p: str(p.relative_to(EXAMPLE_DIR)),
309+
)
310+
def test_csv_readable(self, csv_file: Path):
311+
"""Test that CSV files can be read by pandas."""
312+
try:
313+
df = pd.read_csv(csv_file)
314+
assert len(df.columns) > 0, f"CSV {csv_file.name} has no columns"
315+
except Exception as e:
316+
pytest.fail(f"Cannot read CSV {csv_file.name}: {e}")
317+
318+
@pytest.mark.skipif(not EXCEL_FILES, reason="No Excel files found")
319+
@pytest.mark.parametrize(
320+
"excel_file",
321+
EXCEL_FILES,
322+
ids=lambda p: str(p.relative_to(EXAMPLE_DIR)),
323+
)
324+
def test_excel_readable(self, excel_file: Path):
325+
"""Test that Excel files can be read by pandas."""
326+
try:
327+
df = pd.read_excel(excel_file)
328+
assert len(df.columns) > 0, f"Excel {excel_file.name} has no columns"
329+
except Exception as e:
330+
pytest.fail(f"Cannot read Excel {excel_file.name}: {e}")
331+
332+
333+
if __name__ == "__main__":
334+
pytest.main([__file__, "-v"])

0 commit comments

Comments
 (0)