-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathpyproject.toml
More file actions
74 lines (67 loc) · 2.15 KB
/
pyproject.toml
File metadata and controls
74 lines (67 loc) · 2.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
[project]
name = "pdf-parse-bench"
version = "0.4.0"
description = "Benchmark suite for evaluating PDF parser quality on mathematical formulas and tables"
readme = "README.md"
requires-python = ">=3.12"
license = {file = "LICENSE"}
authors = [{name = "Pius Horn", email = "piushorn@me.com"}]
keywords = ["pdf", "parser", "benchmark", "ocr", "evaluation", "mathematical-formulas", "tables"]
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: Scientific/Engineering",
"Topic :: Software Development :: Quality Assurance",
"Topic :: Software Development :: Testing",
"Topic :: Text Processing",
"Operating System :: OS Independent",
]
dependencies = [
# Core
"pydantic>=2.10",
"pydantic-settings>=2.0.0",
"pyyaml>=6.0.0",
"python-dotenv>=1.0.0",
"tqdm>=4.66.0",
"rich>=14.1.0",
"click>=8.0.0",
# Evaluation & AI
"openai>=2.17.0",
"mistralai>=1.12.3",
"requests>=2.32.0",
"levenshtein>=0.27.1",
# Used for Unicode to LaTeX conversion in formula rendering
"pylatexenc>=2.10",
"pillow>=10.0.0",
"pymupdf>=1.24.0",
# Synthetic PDF generation - for creating test documents
"faker>=37.11.0",
"duckdb>=1.4.1",
"notebook>=7.5.0",
"markdown>=3.10.1",
"scipy>=1.17.0",
"matplotlib>=3.10.8",
"anthropic>=0.84.0",
]
[project.urls]
Homepage = "https://github.com/phorn1/pdf-parse-bench"
Repository = "https://github.com/phorn1/pdf-parse-bench"
Issues = "https://github.com/phorn1/pdf-parse-bench/issues"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["src/pdf_parse_bench"]
[tool.hatch.build.targets.wheel.force-include]
"data/2026-q1-formulas-only" = "pdf_parse_bench/data/2026-q1-formulas-only"
"data/2026-q1-tables-only" = "pdf_parse_bench/data/2026-q1-tables-only"
[tool.hatch.build.targets.sdist]
include = [
"/src",
"/data",
]