Skip to content

Commit cb3c71d

Browse files
dan-blanchardclaude
andcommitted
feat: add --threads passthrough to compare_detectors.py
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent d168ef0 commit cb3c71d

1 file changed

Lines changed: 48 additions & 5 deletions

File tree

scripts/compare_detectors.py

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,14 +93,20 @@ def _cache_filename( # noqa: PLR0913
9393
python_tag: str,
9494
build_tag: str,
9595
kind: str,
96+
*,
97+
threads: int = 1,
9698
) -> str:
9799
"""Build a cache filename like ``chardet_7.0.1_a1b2c3_cpython3.11_mypyc_time.json``.
98100
101+
When *threads* > 1, a ``{N}threads`` segment is inserted before *kind*:
102+
``chardet_7.0.1_a1b2c3_cpython3.11_mypyc_4threads_time.json``.
103+
99104
*detector_name* should be the package name (e.g. ``"chardet"``,
100105
``"charset-normalizer"``), **not** the display label.
101106
"""
102107
safe_name = detector_name.replace(" ", "-").replace("/", "-")
103-
return f"{safe_name}_{detector_version}_{benchmark_hash}_{python_tag}_{build_tag}_{kind}.json"
108+
threads_seg = f"_{threads}threads" if threads > 1 else ""
109+
return f"{safe_name}_{detector_version}_{benchmark_hash}_{python_tag}_{build_tag}{threads_seg}_{kind}.json"
104110

105111

106112
def _load_cached(cache_dir: Path, filename: str) -> dict | None:
@@ -329,12 +335,19 @@ def _has_full_cache( # noqa: PLR0913
329335
build_tag: str,
330336
*,
331337
skip_memory: bool = False,
338+
threads: int = 1,
332339
) -> bool:
333340
"""Return ``True`` if all required cache files exist."""
334341
kinds = ("time",) if skip_memory else ("time", "memory")
335342
for kind in kinds:
336343
fname = _cache_filename(
337-
detector_type, version, benchmark_hash, python_tag, build_tag, kind
344+
detector_type,
345+
version,
346+
benchmark_hash,
347+
python_tag,
348+
build_tag,
349+
kind,
350+
threads=threads,
338351
)
339352
if not (cache_dir / fname).is_file():
340353
return False
@@ -406,13 +419,14 @@ def _cleanup_venv(venv_dir: Path) -> None:
406419
# ---------------------------------------------------------------------------
407420

408421

409-
def _run_timing_subprocess(
422+
def _run_timing_subprocess( # noqa: PLR0913
410423
python_executable: str,
411424
data_dir: str,
412425
*,
413426
detector_type: str = "chardet",
414427
encoding_era: str = "all",
415428
pure: bool = False,
429+
threads: int = 1,
416430
) -> _TimingResult:
417431
"""Run detection timing in an isolated subprocess via ``benchmark_time.py``.
418432
@@ -449,6 +463,8 @@ def _run_timing_subprocess(
449463
cmd.extend(["--encoding-era", encoding_era])
450464
if pure:
451465
cmd.append("--pure")
466+
if threads > 1:
467+
cmd.extend(["--threads", str(threads)])
452468

453469
result = subprocess.run(cmd, capture_output=True, text=True, check=False)
454470
if result.returncode != 0:
@@ -498,6 +514,7 @@ def _run_timing_with_median( # noqa: PLR0913
498514
encoding_era: str = "all",
499515
pure: bool = False,
500516
num_runs: int = 3,
517+
threads: int = 1,
501518
) -> _TimingResult:
502519
"""Run timing ``num_runs`` times and return median-aggregated results.
503520
@@ -518,6 +535,7 @@ def _run_timing_with_median( # noqa: PLR0913
518535
detector_type=detector_type,
519536
encoding_era=encoding_era,
520537
pure=pure,
538+
threads=threads,
521539
)
522540
if i == 0:
523541
first_results = run.results
@@ -654,6 +672,7 @@ def run_comparison( # noqa: PLR0913
654672
use_cache: bool = True,
655673
benchmark_hash: str = "",
656674
no_memory: bool = False,
675+
threads: int = 1,
657676
) -> None:
658677
"""Run accuracy and performance comparison across detectors.
659678
@@ -695,6 +714,8 @@ def run_comparison( # noqa: PLR0913
695714

696715
print(f"Found {len(test_files)} test files")
697716
print(f"Detectors: {', '.join(detector_labels)}")
717+
if threads > 1:
718+
print(f"Threads: {threads}")
698719
print()
699720
print("Equivalences used:")
700721
print(" Superset relationships (detected superset of expected is correct):")
@@ -746,7 +767,13 @@ def _run_timing_for_detector(
746767
# Check cache
747768
if cache_dir is not None:
748769
fname = _cache_filename(
749-
detector_type, version, benchmark_hash, py_tag, b_tag, "time"
770+
detector_type,
771+
version,
772+
benchmark_hash,
773+
py_tag,
774+
b_tag,
775+
"time",
776+
threads=threads,
750777
)
751778
cached = _load_cached(cache_dir, fname)
752779
if cached is not None:
@@ -777,12 +804,19 @@ def _run_timing_for_detector(
777804
encoding_era=era,
778805
pure=is_pure,
779806
num_runs=num_runs,
807+
threads=threads,
780808
)
781809

782810
# Save to cache
783811
if cache_dir is not None:
784812
fname = _cache_filename(
785-
detector_type, version, benchmark_hash, py_tag, b_tag, "time"
813+
detector_type,
814+
version,
815+
benchmark_hash,
816+
py_tag,
817+
b_tag,
818+
"time",
819+
threads=threads,
786820
)
787821
_save_cache(
788822
cache_dir,
@@ -1203,6 +1237,13 @@ def _run_timing_for_detector(
12031237
" (sets HATCH_BUILD_HOOK_ENABLE_MYPYC=true)"
12041238
),
12051239
)
1240+
parser.add_argument(
1241+
"--threads",
1242+
type=int,
1243+
default=1,
1244+
metavar="N",
1245+
help="Number of detection threads for benchmark_time.py (default: 1)",
1246+
)
12061247
args = parser.parse_args()
12071248

12081249
if args.pure and args.mypyc:
@@ -1331,6 +1372,7 @@ def _run_timing_for_detector(
13311372
python_tags[label],
13321373
build_tags[label],
13331374
skip_memory=args.no_memory,
1375+
threads=args.threads,
13341376
):
13351377
print(f" {label}: full cache hit, skipping venv creation")
13361378
else:
@@ -1434,6 +1476,7 @@ def _create_venv_from_spec(
14341476
use_cache=use_cache,
14351477
benchmark_hash=benchmark_hash,
14361478
no_memory=args.no_memory,
1479+
threads=args.threads,
14371480
)
14381481
finally:
14391482
for label, (venv_dir, _) in venvs.items():

0 commit comments

Comments
 (0)