Skip to content

Commit 5b875e4

Browse files
authored
Implement IDNA encode/decode caching (#476)
1 parent acc934e commit 5b875e4

File tree

8 files changed

+120
-9
lines changed

8 files changed

+120
-9
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,3 +104,4 @@ yarl/_quoting.html
104104
.install-cython
105105
.install-deps
106106
.pytest_cache
107+
pip-wheel-metadata

CHANGES/476.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Cache slow IDNA encode/decode calls.

docs/api.rst

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -793,6 +793,39 @@ Default port substitution
793793
False
794794

795795

796+
Cache control
797+
-------------
798+
799+
IDNA conversion used for host encoding is quite expensive operation, that's why the
800+
``yarl`` library caches IDNA encoding/decoding calls by storing last ``256`` encodes
801+
and last ``256`` decodes in the global LRU cache.
802+
803+
.. function:: cache_clear()
804+
805+
Clear IDNA caches.
806+
807+
808+
.. function:: cache_info()
809+
810+
Return a dictionary with ``"idna_encode"`` and ``"idna_decode"`` keys, each value
811+
points to corresponding ``CacheInfo`` structure (see :func:`functools.lru_cache` for
812+
details):
813+
814+
.. doctest::
815+
:options: +SKIP
816+
817+
>>> yarl.cache_info()
818+
{'idna_encode': CacheInfo(hits=5, misses=5, maxsize=256, currsize=5),
819+
'idna_decode': CacheInfo(hits=24, misses=15, maxsize=256, currsize=15)}
820+
821+
822+
.. function:: cache_configure(*, idna_encode_size=256, idna_decode_size=256)
823+
824+
Set IDNA encode and decode cache sizes (``256`` for each by default).
825+
826+
Pass ``None`` to make the corresponding cache unbounded (may speed up the IDNA
827+
encoding/decoding operation a little but the memory footprint can be very high,
828+
please use with caution).
796829

797830
References
798831
----------

requirements/test.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ pytest-cov>=2.3.1
22
pytest==5.4.3
33
multidict==4.7.6
44
idna==2.10
5+
typing_extensions==3.7.4.2
56
-e .

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
except IndexError:
3131
raise RuntimeError("Unable to determine version.")
3232

33-
install_requires = ["multidict>=4.0", "idna>=2.0"]
33+
install_requires = ["multidict>=4.0", "idna>=2.0", "typing_extensions>=3.7.4"]
3434

3535

3636
def read(name):

tests/test_cache.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import yarl
2+
3+
# Don't check the actual behavior but make sure that calls are allowed
4+
5+
6+
def teardown_module():
7+
yarl.cache_configure()
8+
9+
10+
def test_cache_clear() -> None:
11+
yarl.cache_clear()
12+
13+
14+
def test_cache_info() -> None:
15+
info = yarl.cache_info()
16+
assert info.keys() == {"idna_encode", "idna_decode"}
17+
18+
19+
def test_cache_configure_default() -> None:
20+
yarl.cache_configure()
21+
22+
23+
def test_cache_configure_None() -> None:
24+
yarl.cache_configure(idna_encode_size=None, idna_decode_size=None)
25+
26+
27+
def test_cache_configure_explicit() -> None:
28+
yarl.cache_configure(idna_encode_size=128, idna_decode_size=128)

yarl/__init__.py

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import functools
12
import sys
23
import warnings
34
from collections.abc import Mapping, Sequence
@@ -453,10 +454,7 @@ def host(self):
453454
# fe80::2%Проверка
454455
# presence of '%' sign means only IPv6 address, so idna is useless.
455456
return raw
456-
try:
457-
return idna.decode(raw.encode("ascii"))
458-
except UnicodeError: # e.g. '::1'
459-
return raw.encode("ascii").decode("idna")
457+
return _idna_decode(raw)
460458

461459
@property
462460
def port(self):
@@ -671,12 +669,11 @@ def _encode_host(cls, host):
671669
except ValueError:
672670
# IDNA encoding is slow,
673671
# skip it for ASCII-only strings
672+
# Don't move the check into _idna_encode() helper
673+
# to reduce the cache size
674674
if host.isascii():
675675
return host
676-
try:
677-
host = idna.encode(host, uts46=True).decode("ascii")
678-
except UnicodeError:
679-
host = host.encode("idna").decode("ascii")
676+
return _idna_encode(host)
680677
else:
681678
host = ip.compressed
682679
if sep:
@@ -1029,3 +1026,41 @@ def human_repr(self):
10291026
self.fragment,
10301027
)
10311028
)
1029+
1030+
1031+
_MAXCACHE = 256
1032+
1033+
1034+
@functools.lru_cache(_MAXCACHE)
1035+
def _idna_decode(raw):
1036+
try:
1037+
return idna.decode(raw.encode("ascii"))
1038+
except UnicodeError: # e.g. '::1'
1039+
return raw.encode("ascii").decode("idna")
1040+
1041+
1042+
@functools.lru_cache(_MAXCACHE)
1043+
def _idna_encode(host):
1044+
try:
1045+
return idna.encode(host, uts46=True).decode("ascii")
1046+
except UnicodeError:
1047+
return host.encode("idna").decode("ascii")
1048+
1049+
1050+
def cache_clear():
1051+
_idna_decode.cache_clear()
1052+
_idna_encode.cache_clear()
1053+
1054+
1055+
def cache_info():
1056+
return {
1057+
"idna_encode": _idna_encode.cache_info(),
1058+
"idna_decode": _idna_decode.cache_info(),
1059+
}
1060+
1061+
1062+
def cache_configure(*, idna_encode_size=_MAXCACHE, idna_decode_size=_MAXCACHE):
1063+
global _idna_decode, _idna_encode
1064+
1065+
_idna_encode = functools.lru_cache(idna_encode_size)(_idna_encode.__wrapped__)
1066+
_idna_decode = functools.lru_cache(idna_decode_size)(_idna_decode.__wrapped__)

yarl/__init__.pyi

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from typing import overload, Any, Tuple, Optional, Mapping, Union, Sequence, Type
2+
from typing_extensions import TypedDict
23
import multidict
4+
from functools import _CacheInfo
35

46
_QueryVariable = Union[str, int]
57
_Query = Union[
@@ -87,3 +89,13 @@ class cached_property:
8789
def __init__(self, wrapped: Any) -> None: ...
8890
def __get__(self, inst: URL, owner: Type[URL]) -> Any: ...
8991
def __set__(self, inst: URL, value: Any) -> None: ...
92+
93+
class CacheInfo(TypedDict):
94+
idna_encode: _CacheInfo
95+
idna_decode: _CacheInfo
96+
97+
def cache_clear() -> None: ...
98+
def cache_info() -> CacheInfo: ...
99+
def cache_configure(
100+
*, idna_encode_size: Optional[int] = ..., idna_decode_size: Optional[int] = ...
101+
) -> None: ...

0 commit comments

Comments
 (0)