|
20 | 20 |
|
21 | 21 | import logging |
22 | 22 | import threading |
23 | | -from contextlib import nullcontext |
| 23 | +from contextlib import contextmanager, nullcontext |
24 | 24 | from functools import wraps |
25 | 25 | from types import TracebackType |
26 | 26 | from typing import ( |
27 | 27 | TYPE_CHECKING, |
28 | 28 | Any, |
29 | 29 | Awaitable, |
30 | 30 | Callable, |
| 31 | + ContextManager, |
31 | 32 | Dict, |
| 33 | + Generator, |
32 | 34 | Iterable, |
33 | 35 | Optional, |
34 | 36 | Protocol, |
|
49 | 51 | LoggingContext, |
50 | 52 | PreserveLoggingContext, |
51 | 53 | ) |
52 | | -from synapse.logging.opentracing import SynapseTags, start_active_span |
| 54 | +from synapse.logging.opentracing import ( |
| 55 | + SynapseTags, |
| 56 | + active_span, |
| 57 | + start_active_span, |
| 58 | + start_active_span_follows_from, |
| 59 | +) |
53 | 60 | from synapse.metrics import SERVER_NAME_LABEL |
54 | 61 | from synapse.metrics._types import Collector |
55 | 62 |
|
@@ -264,15 +271,97 @@ async def run() -> Optional[R]: |
264 | 271 |
|
265 | 272 | with BackgroundProcessLoggingContext( |
266 | 273 | name=desc, server_name=server_name, instance_id=count |
267 | | - ) as context: |
| 274 | + ) as logging_context: |
268 | 275 | try: |
269 | 276 | if bg_start_span: |
270 | | - ctx = start_active_span( |
271 | | - f"bgproc.{desc}", tags={SynapseTags.REQUEST_ID: str(context)} |
272 | | - ) |
| 277 | + original_active_tracing_span = active_span() |
| 278 | + |
| 279 | + # If there is already an active span (e.g. because this background |
| 280 | + # process was started as part of handling a request for example), |
| 281 | + # because this is a long-running background task that may serve a |
| 282 | + # broader purpose than the request that kicked it off, we don't want |
| 283 | + # it to be a direct child of the currently active trace connected to |
| 284 | + # the request. We only want a loose reference to jump between the |
| 285 | + # traces. |
| 286 | + # |
| 287 | + # For example, when making a `/messages` request, when approaching a |
| 288 | + # gap, we may kick off a background process to fetch missing events |
| 289 | + # from federation. The `/messages` request trace should't include |
| 290 | + # the entire time taken and details around fetching the missing |
| 291 | + # events since the request doesn't rely on the result, it was just |
| 292 | + # part of the heuristic to initiate things. |
| 293 | + # |
| 294 | + # We don't care about the value from the context manager as it's not |
| 295 | + # used (so we just use `Any` for the type). Ideally, we'd be able to |
| 296 | + # mark this as unused like an `assert_never` of sorts. |
| 297 | + tracing_scope: ContextManager[Any] |
| 298 | + if original_active_tracing_span is not None: |
| 299 | + # With the OpenTracing client that we're using, it's impossible to |
| 300 | + # create a disconnected root span while also providing `references` |
| 301 | + # so we first create a bare root span, then create a child span that |
| 302 | + # includes the references that we want. |
| 303 | + root_tracing_scope = start_active_span( |
| 304 | + f"bgproc.{desc}", |
| 305 | + tags={SynapseTags.REQUEST_ID: str(logging_context)}, |
| 306 | + # Create a root span for the background process (disconnected |
| 307 | + # from other spans) |
| 308 | + ignore_active_span=True, |
| 309 | + ) |
| 310 | + |
| 311 | + # Also add a span in the original request trace that cross-links |
| 312 | + # to background process trace. We immediately finish the span as |
| 313 | + # this is just a marker to follow where the real work is being |
| 314 | + # done. |
| 315 | + # |
| 316 | + # In OpenTracing, `FOLLOWS_FROM` indicates parent-child |
| 317 | + # relationship whereas we just want a cross-link to the |
| 318 | + # downstream trace. This is a bit hacky, but the closest we |
| 319 | + # can get to in OpenTracing land. If we ever migrate to |
| 320 | + # OpenTelemetry, we should use a normal `Link` for this. |
| 321 | + with start_active_span_follows_from( |
| 322 | + f"start_bgproc.{desc}", |
| 323 | + child_of=original_active_tracing_span, |
| 324 | + ignore_active_span=True, |
| 325 | + # Points to the background process span. |
| 326 | + contexts=[root_tracing_scope.span.context], |
| 327 | + ): |
| 328 | + pass |
| 329 | + |
| 330 | + # Then start the tracing scope that we're going to use for |
| 331 | + # the duration of the background process within the root |
| 332 | + # span we just created. |
| 333 | + child_tracing_scope = start_active_span_follows_from( |
| 334 | + f"bgproc_child.{desc}", |
| 335 | + child_of=root_tracing_scope.span, |
| 336 | + ignore_active_span=True, |
| 337 | + tags={SynapseTags.REQUEST_ID: str(logging_context)}, |
| 338 | + # Create the `FOLLOWS_FROM` reference to the request's |
| 339 | + # span so there is a loose coupling between the two |
| 340 | + # traces and it's easy to jump between. |
| 341 | + contexts=[original_active_tracing_span.context], |
| 342 | + ) |
| 343 | + |
| 344 | + # For easy usage down below, we create a context manager that |
| 345 | + # combines both scopes. |
| 346 | + @contextmanager |
| 347 | + def combined_context_manager() -> Generator[None, None, None]: |
| 348 | + with root_tracing_scope, child_tracing_scope: |
| 349 | + yield |
| 350 | + |
| 351 | + tracing_scope = combined_context_manager() |
| 352 | + |
| 353 | + else: |
| 354 | + # Otherwise, when there is no active span, we will be creating |
| 355 | + # a disconnected root span already and we don't have to |
| 356 | + # worry about cross-linking to anything. |
| 357 | + tracing_scope = start_active_span( |
| 358 | + f"bgproc.{desc}", |
| 359 | + tags={SynapseTags.REQUEST_ID: str(logging_context)}, |
| 360 | + ) |
273 | 361 | else: |
274 | | - ctx = nullcontext() # type: ignore[assignment] |
275 | | - with ctx: |
| 362 | + tracing_scope = nullcontext() |
| 363 | + |
| 364 | + with tracing_scope: |
276 | 365 | return await func(*args, **kwargs) |
277 | 366 | except Exception: |
278 | 367 | logger.exception( |
|
0 commit comments