Skip to content

Commit 9881640

Browse files
committed
pythongh-142183: Cache multiple data stack chunks
1 parent 9a57179 commit 9881640

4 files changed

Lines changed: 114 additions & 15 deletions

File tree

Lib/test/test_capi/test_misc.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2665,6 +2665,35 @@ class Test_testinternalcapi(unittest.TestCase):
26652665
locals().update(get_test_funcs(_testinternalcapi,
26662666
exclude_prefix='test_lock_'))
26672667

2668+
@support.skip_emscripten_stack_overflow()
2669+
@support.skip_wasi_stack_overflow()
2670+
def test_datastack_caches_multiple_chunks(self):
2671+
code = textwrap.dedent("""
2672+
import struct
2673+
import sys
2674+
import _testinternalcapi
2675+
2676+
def recurse(n):
2677+
_a=_b=_c=_d=_e=_f=_g=_h=_i=_j=None
2678+
_k=_l=_m=_n=_o=_p=_q=_r=_s=_t=None
2679+
if n:
2680+
recurse(n - 1)
2681+
2682+
words_per_chunk = 16 * 1024 // struct.calcsize("P")
2683+
frame_words = _testinternalcapi.get_co_framesize(recurse.__code__)
2684+
depth = max(64, 12 * words_per_chunk // frame_words + 32)
2685+
sys.setrecursionlimit(depth + 100)
2686+
2687+
recurse(depth)
2688+
2689+
count, total_size = _testinternalcapi.get_datastack_cache_stats()
2690+
assert count >= 2, (count, total_size, frame_words, depth)
2691+
assert total_size <= 8 * 16 * 1024, (
2692+
count, total_size, frame_words, depth
2693+
)
2694+
""")
2695+
assert_python_ok("-c", code)
2696+
26682697

26692698
@threading_helper.requires_working_threading()
26702699
class Test_PyLock(unittest.TestCase):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Cache multiple popped Python stack chunks per thread, within a small fixed
2+
memory budget, to avoid allocator thrashing when repeatedly crossing more than
3+
one stack chunk boundary.

Modules/_testinternalcapi.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1337,6 +1337,28 @@ get_co_framesize(PyObject *self, PyObject *arg)
13371337
return PyLong_FromLong(code->co_framesize);
13381338
}
13391339

1340+
static PyObject *
1341+
get_datastack_cache_stats(PyObject *self, PyObject *Py_UNUSED(ignored))
1342+
{
1343+
PyThreadState *tstate = _PyThreadState_GET();
1344+
Py_ssize_t count = 0;
1345+
size_t total_size = 0;
1346+
for (_PyStackChunk *chunk = tstate->datastack_cached_chunk;
1347+
chunk != NULL;
1348+
chunk = chunk->previous)
1349+
{
1350+
count++;
1351+
total_size += chunk->size;
1352+
}
1353+
1354+
PyObject *size = PyLong_FromSize_t(total_size);
1355+
if (size == NULL) {
1356+
return NULL;
1357+
}
1358+
PyObject *res = Py_BuildValue("nN", count, size);
1359+
return res;
1360+
}
1361+
13401362
static PyObject *
13411363
get_co_localskinds(PyObject *self, PyObject *arg)
13421364
{
@@ -2938,6 +2960,7 @@ static PyMethodDef module_functions[] = {
29382960
{"iframe_getlasti", iframe_getlasti, METH_O, NULL},
29392961
{"code_returns_only_none", code_returns_only_none, METH_O, NULL},
29402962
{"get_co_framesize", get_co_framesize, METH_O, NULL},
2963+
{"get_datastack_cache_stats", get_datastack_cache_stats, METH_NOARGS, NULL},
29412964
{"get_co_localskinds", get_co_localskinds, METH_O, NULL},
29422965
{"get_code_var_counts", _PyCFunction_CAST(get_code_var_counts),
29432966
METH_VARARGS | METH_KEYWORDS, NULL},

Python/pystate.c

Lines changed: 59 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1726,10 +1726,12 @@ clear_datastack(PyThreadState *tstate)
17261726
_PyObject_VirtualFree(chunk, chunk->size);
17271727
chunk = prev;
17281728
}
1729-
if (tstate->datastack_cached_chunk != NULL) {
1730-
_PyObject_VirtualFree(tstate->datastack_cached_chunk,
1731-
tstate->datastack_cached_chunk->size);
1732-
tstate->datastack_cached_chunk = NULL;
1729+
chunk = tstate->datastack_cached_chunk;
1730+
tstate->datastack_cached_chunk = NULL;
1731+
while (chunk != NULL) {
1732+
_PyStackChunk *prev = chunk->previous;
1733+
_PyObject_VirtualFree(chunk, chunk->size);
1734+
chunk = prev;
17331735
}
17341736
}
17351737

@@ -3077,6 +3079,56 @@ _PyInterpreterState_HasFeature(PyInterpreterState *interp, unsigned long feature
30773079

30783080

30793081
#define MINIMUM_OVERHEAD 1000
3082+
#define DATA_STACK_CACHE_MAX_SIZE (8 * _PY_DATA_STACK_CHUNK_SIZE)
3083+
3084+
static _PyStackChunk *
3085+
pop_cached_datastack_chunk(PyThreadState *tstate, int allocate_size)
3086+
{
3087+
_PyStackChunk **best_link = NULL;
3088+
size_t best_size = (size_t)-1;
3089+
3090+
for (_PyStackChunk **link = &tstate->datastack_cached_chunk;
3091+
*link != NULL;
3092+
link = &(*link)->previous)
3093+
{
3094+
_PyStackChunk *chunk = *link;
3095+
if ((size_t)allocate_size <= chunk->size && chunk->size < best_size) {
3096+
best_link = link;
3097+
best_size = chunk->size;
3098+
}
3099+
}
3100+
if (best_link == NULL) {
3101+
return NULL;
3102+
}
3103+
3104+
_PyStackChunk *chunk = *best_link;
3105+
*best_link = chunk->previous;
3106+
chunk->previous = NULL;
3107+
chunk->top = 0;
3108+
return chunk;
3109+
}
3110+
3111+
static void
3112+
cache_or_free_datastack_chunk(PyThreadState *tstate, _PyStackChunk *chunk)
3113+
{
3114+
assert(chunk->previous == NULL);
3115+
3116+
size_t cached_size = chunk->size;
3117+
for (_PyStackChunk *cached = tstate->datastack_cached_chunk;
3118+
cached != NULL;
3119+
cached = cached->previous)
3120+
{
3121+
cached_size += cached->size;
3122+
if (cached_size > DATA_STACK_CACHE_MAX_SIZE) {
3123+
_PyObject_VirtualFree(chunk, chunk->size);
3124+
return;
3125+
}
3126+
}
3127+
3128+
chunk->top = 0;
3129+
chunk->previous = tstate->datastack_cached_chunk;
3130+
tstate->datastack_cached_chunk = chunk;
3131+
}
30803132

30813133
static PyObject **
30823134
push_chunk(PyThreadState *tstate, int size)
@@ -3086,13 +3138,9 @@ push_chunk(PyThreadState *tstate, int size)
30863138
allocate_size *= 2;
30873139
}
30883140
_PyStackChunk *new;
3089-
if (tstate->datastack_cached_chunk != NULL
3090-
&& (size_t)allocate_size <= tstate->datastack_cached_chunk->size)
3091-
{
3092-
new = tstate->datastack_cached_chunk;
3093-
tstate->datastack_cached_chunk = NULL;
3141+
new = pop_cached_datastack_chunk(tstate, allocate_size);
3142+
if (new != NULL) {
30943143
new->previous = tstate->datastack_chunk;
3095-
new->top = 0;
30963144
}
30973145
else {
30983146
new = allocate_chunk(allocate_size, tstate->datastack_chunk);
@@ -3134,17 +3182,13 @@ _PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame * frame)
31343182
if (base == &tstate->datastack_chunk->data[0]) {
31353183
_PyStackChunk *chunk = tstate->datastack_chunk;
31363184
_PyStackChunk *previous = chunk->previous;
3137-
_PyStackChunk *cached = tstate->datastack_cached_chunk;
31383185
// push_chunk ensures that the root chunk is never popped:
31393186
assert(previous);
31403187
tstate->datastack_top = &previous->data[previous->top];
31413188
tstate->datastack_chunk = previous;
31423189
tstate->datastack_limit = (PyObject **)(((char *)previous) + previous->size);
31433190
chunk->previous = NULL;
3144-
if (cached != NULL) {
3145-
_PyObject_VirtualFree(cached, cached->size);
3146-
}
3147-
tstate->datastack_cached_chunk = chunk;
3191+
cache_or_free_datastack_chunk(tstate, chunk);
31483192
}
31493193
else {
31503194
assert(tstate->datastack_top);

0 commit comments

Comments
 (0)