Skip to content
1 change: 1 addition & 0 deletions changelog.d/18871.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Store the `LoggingContext` in a `ContextVar` instead of a thread-local variable.
93 changes: 9 additions & 84 deletions synapse/logging/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import threading
import typing
import warnings
from contextvars import ContextVar
from types import TracebackType
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -653,13 +654,12 @@ def __exit__(
)


_thread_local = threading.local()
_thread_local.current_context = SENTINEL_CONTEXT
_current_context: ContextVar[LoggingContextOrSentinel] = ContextVar("current_context")
Copy link
Copy Markdown
Contributor Author

@MadLittleMods MadLittleMods Sep 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Error: Called stop on logcontext POST-0 without recording a start rusage

There is a problem where the POST-0 LoggingContext is somehow becoming the current context without a corresponding set_current_context(POST-0) call.

See the lines marked red in the snippet below.

SYNAPSE_TEST_LOG_LEVEL=INFO poetry run trial tests.rest.client.test_rooms.RoomStateTestCase.test_get_state_event_cancellation

_trial_temp/test.log

  2025-09-02 19:12:06-0500 [-] synapse.http.site - 304 - INFO - sentinel - asdf SynapseRequest render
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 662 - INFO - sentinel - asdf PreserveLoggingContext(POST-0).__enter__ nonce=meZAG
+ 2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - sentinel - asdf set_current_context(POST-0) (previous=sentinel)
+ 2025-09-02 19:12:06-0500 [-] synapse.logging.context - 454 - INFO - POST-0 - asdf LoggingContext(POST-0).start  usage_start=True
+ 2025-09-02 19:12:06-0500 [-] synapse.logging.context - 675 - INFO - POST-0 - asdf PreserveLoggingContext(POST-0).__exit__ nonce=meZAG restoring old_context=sentinel
+ 2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - POST-0 - asdf set_current_context(sentinel) (previous=POST-0)
+ 2025-09-02 19:12:06-0500 [-] synapse.logging.context - 471 - INFO - POST-0 - asdf LoggingContext(POST-0).stop usage_start=True rusage=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 662 - INFO - sentinel - asdf PreserveLoggingContext(sentinel).__enter__ nonce=xQsLm
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - sentinel - asdf set_current_context(sentinel) (previous=sentinel)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - sentinel - asdf set_current_context(_handle_new_device_update_async-0) (previous=sentinel)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 454 - INFO - _handle_new_device_update_async-0 - asdf LoggingContext(_handle_new_device_update_async-0).start  usage_start=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 675 - INFO - sentinel - asdf PreserveLoggingContext(sentinel).__exit__ nonce=xQsLm restoring old_context=sentinel
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - sentinel - asdf set_current_context(sentinel) (previous=sentinel)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - sentinel - asdf set_current_context(db-POST-0) (previous=sentinel)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 454 - INFO - db-POST-0 - asdf LoggingContext(db-POST-0).start  usage_start=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - db-POST-0 - asdf set_current_context(sentinel) (previous=db-POST-0)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 471 - INFO - db-POST-0 - asdf LoggingContext(db-POST-0).stop usage_start=True rusage=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - sentinel - asdf set_current_context(db-_handle_new_device_update_async-0) (previous=sentinel)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 454 - INFO - db-_handle_new_device_update_async-0 - asdf LoggingContext(db-_handle_new_device_update_async-0).start  usage_start=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - db-_handle_new_device_update_async-0 - asdf set_current_context(sentinel) (previous=db-_handle_new_device_update_async-0)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 471 - INFO - db-_handle_new_device_update_async-0 - asdf LoggingContext(db-_handle_new_device_update_async-0).stop usage_start=True rusage=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - sentinel - asdf set_current_context(db-POST-0) (previous=sentinel)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 454 - INFO - db-POST-0 - asdf LoggingContext(db-POST-0).start  usage_start=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - db-POST-0 - asdf set_current_context(sentinel) (previous=db-POST-0)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 471 - INFO - db-POST-0 - asdf LoggingContext(db-POST-0).stop usage_start=True rusage=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - sentinel - asdf set_current_context(db-_handle_new_device_update_async-0) (previous=sentinel)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 454 - INFO - db-_handle_new_device_update_async-0 - asdf LoggingContext(db-_handle_new_device_update_async-0).start  usage_start=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - db-_handle_new_device_update_async-0 - asdf set_current_context(sentinel) (previous=db-_handle_new_device_update_async-0)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 471 - INFO - db-_handle_new_device_update_async-0 - asdf LoggingContext(db-_handle_new_device_update_async-0).stop usage_start=True rusage=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - sentinel - asdf set_current_context(db-POST-0) (previous=sentinel)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 454 - INFO - db-POST-0 - asdf LoggingContext(db-POST-0).start  usage_start=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - db-POST-0 - asdf set_current_context(sentinel) (previous=db-POST-0)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 471 - INFO - db-POST-0 - asdf LoggingContext(db-POST-0).stop usage_start=True rusage=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - _handle_new_device_update_async-0 - asdf set_current_context(sentinel) (previous=_handle_new_device_update_async-0)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 471 - INFO - _handle_new_device_update_async-0 - asdf LoggingContext(_handle_new_device_update_async-0).stop usage_start=True rusage=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - sentinel - asdf set_current_context(db-POST-0) (previous=sentinel)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 454 - INFO - db-POST-0 - asdf LoggingContext(db-POST-0).start  usage_start=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - db-POST-0 - asdf set_current_context(sentinel) (previous=db-POST-0)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 471 - INFO - db-POST-0 - asdf LoggingContext(db-POST-0).stop usage_start=True rusage=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - sentinel - asdf set_current_context(db-POST-0) (previous=sentinel)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 454 - INFO - db-POST-0 - asdf LoggingContext(db-POST-0).start  usage_start=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - db-POST-0 - asdf set_current_context(sentinel) (previous=db-POST-0)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 471 - INFO - db-POST-0 - asdf LoggingContext(db-POST-0).stop usage_start=True rusage=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - sentinel - asdf set_current_context(db-POST-0) (previous=sentinel)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 454 - INFO - db-POST-0 - asdf LoggingContext(db-POST-0).start  usage_start=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - db-POST-0 - asdf set_current_context(sentinel) (previous=db-POST-0)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 471 - INFO - db-POST-0 - asdf LoggingContext(db-POST-0).stop usage_start=True rusage=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - sentinel - asdf set_current_context(db-POST-0) (previous=sentinel)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 454 - INFO - db-POST-0 - asdf LoggingContext(db-POST-0).start  usage_start=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - db-POST-0 - asdf set_current_context(sentinel) (previous=db-POST-0)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 471 - INFO - db-POST-0 - asdf LoggingContext(db-POST-0).stop usage_start=True rusage=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - sentinel - asdf set_current_context(db-POST-0) (previous=sentinel)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 454 - INFO - db-POST-0 - asdf LoggingContext(db-POST-0).start  usage_start=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - db-POST-0 - asdf set_current_context(sentinel) (previous=db-POST-0)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 471 - INFO - db-POST-0 - asdf LoggingContext(db-POST-0).stop usage_start=True rusage=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - sentinel - asdf set_current_context(db-POST-0) (previous=sentinel)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 454 - INFO - db-POST-0 - asdf LoggingContext(db-POST-0).start  usage_start=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - db-POST-0 - asdf set_current_context(sentinel) (previous=db-POST-0)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 471 - INFO - db-POST-0 - asdf LoggingContext(db-POST-0).stop usage_start=True rusage=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - sentinel - asdf set_current_context(db-POST-0) (previous=sentinel)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 454 - INFO - db-POST-0 - asdf LoggingContext(db-POST-0).start  usage_start=True
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - db-POST-0 - asdf set_current_context(sentinel) (previous=db-POST-0)
  2025-09-02 19:12:06-0500 [-] synapse.logging.context - 471 - INFO - db-POST-0 - asdf LoggingContext(db-POST-0).stop usage_start=True rusage=True
- 2025-09-02 19:12:06-0500 [-] synapse.logging.context - 662 - INFO - POST-0 - asdf PreserveLoggingContext(sentinel).__enter__ nonce=JvLjU
- 2025-09-02 19:12:06-0500 [-] synapse.logging.context - 723 - INFO - POST-0 - asdf set_current_context(sentinel) (previous=POST-0)
- 2025-09-02 19:12:06-0500 [-] synapse.logging.context - 471 - INFO - POST-0 - asdf LoggingContext(POST-0).stop usage_start=False rusage=True
- 2025-09-02 19:12:06-0500 [-] synapse.logging.context - 488 - ERROR - POST-0 - asdf Called stop on logcontext POST-0 without recording a start rusage

Copy link
Copy Markdown
Contributor Author

@MadLittleMods MadLittleMods Sep 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps this is a Twisted bug?

I was under the impression that Twisted supported ContextVar's but now I'm not sure. All of the issues mentioned in matrix-org/synapse#10342 are resolved but there are other things in the Twisted tracker:

Unresolved issues:

Resolved issues:

And we could even be running into something unreported 🤷 Need to investigate more.

Copy link
Copy Markdown
Contributor Author

@MadLittleMods MadLittleMods Sep 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From some more debugging, I think the ContextVar is acting normally.

And this may just be the case that the LoggingContext start/stop pattern isn't compatible with the ContextVar we're using now. We'd have to maintain the log context rules 🤔.

In this case, it's the SynapseRequest.logcontext where stop is called because we have a PreserveLoggingContext around SynapseRequest.render which only kicks off the render and doesn't wait for it to finish so we stop way before the request is done. And it's never re-started. So when other LoggingContext utilities are used in the downstream code to set_current_context, it will stop the already stopped SynapseRequest.logcontext.

I go back and forth on whether we can update things to work correctly. If I naively try to manage the lifetime myself by calling self.logcontext.__enter__ manually in SynapseRequest.render, it still doesn't work out.



def current_context() -> LoggingContextOrSentinel:
"""Get the current logging context from thread local storage"""
return getattr(_thread_local, "current_context", SENTINEL_CONTEXT)
return _current_context.get(SENTINEL_CONTEXT)


def set_current_context(context: LoggingContextOrSentinel) -> LoggingContextOrSentinel:
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: The docstring needs to be updated

Expand All @@ -680,7 +680,7 @@ def set_current_context(context: LoggingContextOrSentinel) -> LoggingContextOrSe
if current is not context:
rusage = get_thread_resource_usage()
current.stop(rusage)
_thread_local.current_context = context
_current_context.set(context)
context.start(rusage)

return current
Expand Down Expand Up @@ -796,7 +796,6 @@ def run_in_background(
CRITICAL error about an unhandled error will be logged without much
indication about where it came from.
"""
current = current_context()
try:
res = f(*args, **kwargs)
except Exception:
Expand Down Expand Up @@ -825,23 +824,6 @@ def run_in_background(
# optimise out the messing about
return d

# The function may have reset the context before returning, so
# we need to restore it now.
ctx = set_current_context(current)

# The original context will be restored when the deferred
# completes, but there is nothing waiting for it, so it will
# get leaked into the reactor or some other function which
# wasn't expecting it. We therefore need to reset the context
# here.
#
# (If this feels asymmetric, consider it this way: we are
# effectively forking a new thread of execution. We are
# probably currently within a ``with LoggingContext()`` block,
# which is supposed to have a single entry and exit point. But
# by spawning off another deferred, we are effectively
# adding a new exit point.)
d.addBoth(_set_context_cb, ctx)
return d


Expand All @@ -861,65 +843,20 @@ def run_coroutine_in_background(
cannot change the log contexts.
"""

current = current_context()
d = defer.ensureDeferred(coroutine)

# The function may have reset the context before returning, so
# we need to restore it now.
ctx = set_current_context(current)

# The original context will be restored when the deferred
# completes, but there is nothing waiting for it, so it will
# get leaked into the reactor or some other function which
# wasn't expecting it. We therefore need to reset the context
# here.
#
# (If this feels asymmetric, consider it this way: we are
# effectively forking a new thread of execution. We are
# probably currently within a ``with LoggingContext()`` block,
# which is supposed to have a single entry and exit point. But
# by spawning off another deferred, we are effectively
# adding a new exit point.)
d.addBoth(_set_context_cb, ctx)
return d
return defer.ensureDeferred(coroutine)


T = TypeVar("T")


# TODO: This function is a no-op now and should be removed in a follow-up PR.
def make_deferred_yieldable(deferred: "defer.Deferred[T]") -> "defer.Deferred[T]":
Comment on lines +852 to 853
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make_deferred_yieldable no longer does anything (no-op) but there a lot of references to clean-up. I think it would be better to do this in a follow-up PR than bulk up with this diff with changes that will cloud the main change we're trying to introduce.

"""Given a deferred, make it follow the Synapse logcontext rules:

If the deferred has completed, essentially does nothing (just returns another
completed deferred with the result/failure).

If the deferred has not yet completed, resets the logcontext before
returning a deferred. Then, when the deferred completes, restores the
current logcontext before running callbacks/errbacks.

(This is more-or-less the opposite operation to run_in_background.)
"""
if deferred.called and not deferred.paused:
# it looks like this deferred is ready to run any callbacks we give it
# immediately. We may as well optimise out the logcontext faffery.
return deferred

# ok, we can't be sure that a yield won't block, so let's reset the
# logcontext, and add a callback to the deferred to restore it.
prev_context = set_current_context(SENTINEL_CONTEXT)
deferred.addBoth(_set_context_cb, prev_context)
return deferred


ResultT = TypeVar("ResultT")


def _set_context_cb(result: ResultT, context: LoggingContextOrSentinel) -> ResultT:
"""A callback function which just sets the logging context"""
set_current_context(context)
return result


def defer_to_thread(
reactor: "ISynapseReactor", f: Callable[P, R], *args: P.args, **kwargs: P.kwargs
) -> "defer.Deferred[R]":
Expand Down Expand Up @@ -991,18 +928,6 @@ def defer_to_threadpool(
A Deferred which fires a callback with the result of `f`, or an
errback if `f` throws an exception.
"""
curr_context = current_context()
if not curr_context:
logger.warning(
"Calling defer_to_threadpool from sentinel context: metrics will be lost"
)
parent_context = None
else:
assert isinstance(curr_context, LoggingContext)
parent_context = curr_context

def g() -> R:
with LoggingContext(str(curr_context), parent_context=parent_context):
return f(*args, **kwargs)

return make_deferred_yieldable(threads.deferToThreadPool(reactor, threadpool, g))
return make_deferred_yieldable(
threads.deferToThreadPool(reactor, threadpool, f, *args, **kwargs)
)
Loading