test(dialect): update assertions for new honest-stats API

mvalentsev · mvalentsev · commit 79e16471708e · 2026-04-08T02:24:53.000+05:00
PR #147 renamed compression_stats fields (ratio -> size_ratio, compressed_chars -> summary_chars) and switched count_tokens to a word-based heuristic, but the test_dialect tests from PR #131 still assert the old API and fail on main. Bring TestCompressionStats.test_stats in line with the current dict keys (size_ratio, summary_chars, summary_tokens_est) and update test_count_tokens to match the word-based formula, with extra coverage for the empty and single-word edge cases around max(1, ...). This unblocks CI on main, which currently fails on these two tests.
diff --git a/tests/test_dialect.py b/tests/test_dialect.py
@@ -109,11 +109,16 @@ def test_stats(self):
         original = "We decided to use GraphQL instead of REST. " * 10
         compressed = d.compress(original)
         stats = d.compression_stats(original, compressed)
-        assert stats["ratio"] > 1
-        assert stats["original_chars"] > stats["compressed_chars"]
+        assert stats["size_ratio"] > 1
+        assert stats["original_chars"] > stats["summary_chars"]
+        assert stats["original_tokens_est"] > stats["summary_tokens_est"]
 
     def test_count_tokens(self):
-        assert Dialect.count_tokens("hello world") == len("hello world") // 3
+        # count_tokens uses a word-based heuristic (~1.3 tokens per word).
+        # "hello world" is 2 words -> max(1, int(2 * 1.3)) == 2.
+        assert Dialect.count_tokens("hello world") == 2
+        assert Dialect.count_tokens("") == 1
+        assert Dialect.count_tokens("one") == 1
 
 
 class TestZettelEncoding: