Skip to content

Commit 396de65

Browse files
Cleanly shutdown SynapseHomeServer object (#18828)
This PR aims to allow for a clean shutdown of the `SynapseHomeServer` object so that it can be fully deleted and cleaned up by garbage collection without shutting down the entire python process. Fix element-hq/synapse-small-hosts#50 ### Pull Request Checklist <!-- Please read https://element-hq.github.io/synapse/latest/development/contributing_guide.html before submitting your pull request --> * [x] Pull request is based on the develop branch * [x] Pull request includes a [changelog file](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#changelog). The entry should: - Be a short description of your change which makes sense to users. "Fixed a bug that prevented receiving messages from other servers." instead of "Moved X method from `EventStore` to `EventWorkerStore`.". - Use markdown where necessary, mostly for `code blocks`. - End with either a period (.) or an exclamation mark (!). - Start with a capital letter. - Feel free to credit yourself, by adding a sentence "Contributed by @github_username." or "Contributed by [Your Name]." to the end of the entry. * [x] [Code style](https://element-hq.github.io/synapse/latest/code_style.html) is correct (run the [linters](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#run-the-linters)) --------- Co-authored-by: Eric Eastwood <erice@element.io>
1 parent d1c96ee commit 396de65

135 files changed

Lines changed: 2193 additions & 759 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

changelog.d/18828.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Cleanly shutdown `SynapseHomeServer` object.

scripts-dev/mypy_synapse_plugin.py

Lines changed: 138 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,18 +68,42 @@
6868
category="per-homeserver-tenant-metrics",
6969
)
7070

71+
PREFER_SYNAPSE_CLOCK_CALL_LATER = ErrorCode(
72+
"call-later-not-tracked",
73+
"Prefer using `synapse.util.Clock.call_later` instead of `reactor.callLater`",
74+
category="synapse-reactor-clock",
75+
)
76+
77+
PREFER_SYNAPSE_CLOCK_LOOPING_CALL = ErrorCode(
78+
"prefer-synapse-clock-looping-call",
79+
"Prefer using `synapse.util.Clock.looping_call` instead of `task.LoopingCall`",
80+
category="synapse-reactor-clock",
81+
)
82+
7183
PREFER_SYNAPSE_CLOCK_CALL_WHEN_RUNNING = ErrorCode(
7284
"prefer-synapse-clock-call-when-running",
73-
"`synapse.util.Clock.call_when_running` should be used instead of `reactor.callWhenRunning`",
85+
"Prefer using `synapse.util.Clock.call_when_running` instead of `reactor.callWhenRunning`",
7486
category="synapse-reactor-clock",
7587
)
7688

7789
PREFER_SYNAPSE_CLOCK_ADD_SYSTEM_EVENT_TRIGGER = ErrorCode(
7890
"prefer-synapse-clock-add-system-event-trigger",
79-
"`synapse.util.Clock.add_system_event_trigger` should be used instead of `reactor.addSystemEventTrigger`",
91+
"Prefer using `synapse.util.Clock.add_system_event_trigger` instead of `reactor.addSystemEventTrigger`",
8092
category="synapse-reactor-clock",
8193
)
8294

95+
MULTIPLE_INTERNAL_CLOCKS_CREATED = ErrorCode(
96+
"multiple-internal-clocks",
97+
"Only one instance of `clock.Clock` should be created",
98+
category="synapse-reactor-clock",
99+
)
100+
101+
UNTRACKED_BACKGROUND_PROCESS = ErrorCode(
102+
"untracked-background-process",
103+
"Prefer using `HomeServer.run_as_background_process` method over the bare `run_as_background_process`",
104+
category="synapse-tracked-calls",
105+
)
106+
83107

84108
class Sentinel(enum.Enum):
85109
# defining a sentinel in this way allows mypy to correctly handle the
@@ -222,6 +246,18 @@ def get_function_signature_hook(
222246
# callback, let's just pass it in while we have it.
223247
return lambda ctx: check_prometheus_metric_instantiation(ctx, fullname)
224248

249+
if fullname == "twisted.internet.task.LoopingCall":
250+
return check_looping_call
251+
252+
if fullname == "synapse.util.clock.Clock":
253+
return check_clock_creation
254+
255+
if (
256+
fullname
257+
== "synapse.metrics.background_process_metrics.run_as_background_process"
258+
):
259+
return check_background_process
260+
225261
return None
226262

227263
def get_method_signature_hook(
@@ -241,6 +277,13 @@ def get_method_signature_hook(
241277
):
242278
return check_is_cacheable_wrapper
243279

280+
if fullname in (
281+
"twisted.internet.interfaces.IReactorTime.callLater",
282+
"synapse.types.ISynapseThreadlessReactor.callLater",
283+
"synapse.types.ISynapseReactor.callLater",
284+
):
285+
return check_call_later
286+
244287
if fullname in (
245288
"twisted.internet.interfaces.IReactorCore.callWhenRunning",
246289
"synapse.types.ISynapseThreadlessReactor.callWhenRunning",
@@ -258,6 +301,78 @@ def get_method_signature_hook(
258301
return None
259302

260303

304+
def check_clock_creation(ctx: FunctionSigContext) -> CallableType:
305+
"""
306+
Ensure that the only `clock.Clock` instance is the one used by the `HomeServer`.
307+
This is so that the `HomeServer` can cancel any tracked delayed or looping calls
308+
during server shutdown.
309+
310+
Args:
311+
ctx: The `FunctionSigContext` from mypy.
312+
"""
313+
signature: CallableType = ctx.default_signature
314+
ctx.api.fail(
315+
"Expected the only `clock.Clock` instance to be the one used by the `HomeServer`. "
316+
"This is so that the `HomeServer` can cancel any tracked delayed or looping calls "
317+
"during server shutdown",
318+
ctx.context,
319+
code=MULTIPLE_INTERNAL_CLOCKS_CREATED,
320+
)
321+
322+
return signature
323+
324+
325+
def check_call_later(ctx: MethodSigContext) -> CallableType:
326+
"""
327+
Ensure that the `reactor.callLater` callsites aren't used.
328+
329+
`synapse.util.Clock.call_later` should always be used instead of `reactor.callLater`.
330+
This is because the `synapse.util.Clock` tracks delayed calls in order to cancel any
331+
outstanding calls during server shutdown. Delayed calls which are either short lived
332+
(<~60s) or frequently called and can be tracked via other means could be candidates for
333+
using `synapse.util.Clock.call_later` with `call_later_cancel_on_shutdown` set to
334+
`False`. There shouldn't be a need to use `reactor.callLater` outside of tests or the
335+
`Clock` class itself. If a need arises, you can use a type ignore comment to disable the
336+
check, e.g. `# type: ignore[call-later-not-tracked]`.
337+
338+
Args:
339+
ctx: The `FunctionSigContext` from mypy.
340+
"""
341+
signature: CallableType = ctx.default_signature
342+
ctx.api.fail(
343+
"Expected all `reactor.callLater` calls to use `synapse.util.Clock.call_later` "
344+
"instead. This is so that long lived calls can be tracked for cancellation during "
345+
"server shutdown",
346+
ctx.context,
347+
code=PREFER_SYNAPSE_CLOCK_CALL_LATER,
348+
)
349+
350+
return signature
351+
352+
353+
def check_looping_call(ctx: FunctionSigContext) -> CallableType:
354+
"""
355+
Ensure that the `task.LoopingCall` callsites aren't used.
356+
357+
`synapse.util.Clock.looping_call` should always be used instead of `task.LoopingCall`.
358+
`synapse.util.Clock` tracks looping calls in order to cancel any outstanding calls
359+
during server shutdown.
360+
361+
Args:
362+
ctx: The `FunctionSigContext` from mypy.
363+
"""
364+
signature: CallableType = ctx.default_signature
365+
ctx.api.fail(
366+
"Expected all `task.LoopingCall` instances to use `synapse.util.Clock.looping_call` "
367+
"instead. This is so that long lived calls can be tracked for cancellation during "
368+
"server shutdown",
369+
ctx.context,
370+
code=PREFER_SYNAPSE_CLOCK_LOOPING_CALL,
371+
)
372+
373+
return signature
374+
375+
261376
def check_call_when_running(ctx: MethodSigContext) -> CallableType:
262377
"""
263378
Ensure that the `reactor.callWhenRunning` callsites aren't used.
@@ -312,6 +427,27 @@ def check_add_system_event_trigger(ctx: MethodSigContext) -> CallableType:
312427
return signature
313428

314429

430+
def check_background_process(ctx: FunctionSigContext) -> CallableType:
431+
"""
432+
Ensure that calls to `run_as_background_process` use the `HomeServer` method.
433+
This is so that the `HomeServer` can cancel any running background processes during
434+
server shutdown.
435+
436+
Args:
437+
ctx: The `FunctionSigContext` from mypy.
438+
"""
439+
signature: CallableType = ctx.default_signature
440+
ctx.api.fail(
441+
"Prefer using `HomeServer.run_as_background_process` method over the bare "
442+
"`run_as_background_process`. This is so that the `HomeServer` can cancel "
443+
"any background processes during server shutdown",
444+
ctx.context,
445+
code=UNTRACKED_BACKGROUND_PROCESS,
446+
)
447+
448+
return signature
449+
450+
315451
def analyze_prometheus_metric_classes(ctx: ClassDefContext) -> None:
316452
"""
317453
Cross-check the list of Prometheus metric classes against the

synapse/_scripts/generate_workers_map.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,12 @@ def get_registered_paths_for_default(
157157
# TODO We only do this to avoid an error, but don't need the database etc
158158
hs.setup()
159159
registered_paths = get_registered_paths_for_hs(hs)
160-
hs.cleanup()
160+
# NOTE: a more robust implementation would properly shutdown/cleanup each server
161+
# to avoid resource buildup.
162+
# However, the call to `shutdown` is `async` so it would require additional complexity here.
163+
# We are intentionally skipping this cleanup because this is a short-lived, one-off
164+
# utility script where the simpler approach is sufficient and we shouldn't run into
165+
# any resource buildup issues.
161166

162167
return registered_paths
163168

synapse/_scripts/update_synapse_database.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
from twisted.internet import defer, reactor as reactor_
2929

3030
from synapse.config.homeserver import HomeServerConfig
31-
from synapse.metrics.background_process_metrics import run_as_background_process
3231
from synapse.server import HomeServer
3332
from synapse.storage import DataStore
3433
from synapse.types import ISynapseReactor
@@ -53,7 +52,6 @@ def __init__(self, config: HomeServerConfig):
5352

5453

5554
def run_background_updates(hs: HomeServer) -> None:
56-
server_name = hs.hostname
5755
main = hs.get_datastores().main
5856
state = hs.get_datastores().state
5957

@@ -67,9 +65,8 @@ async def run_background_updates() -> None:
6765
def run() -> None:
6866
# Apply all background updates on the database.
6967
defer.ensureDeferred(
70-
run_as_background_process(
68+
hs.run_as_background_process(
7169
"background_updates",
72-
server_name,
7370
run_background_updates,
7471
)
7572
)

0 commit comments

Comments
 (0)