Skip to content
Merged
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
6896221
Start of lint for checking whether Prometheus metrics have the `serve…
MadLittleMods Jul 4, 2025
ada39e3
Add more comments
MadLittleMods Jul 4, 2025
0c45aab
Add specific error code
MadLittleMods Jul 4, 2025
4b3f042
Merge branch 'develop' into madlittlemods/18592-metric-lint-labels
MadLittleMods Jul 4, 2025
6e74eca
Revert temp changes to `synapse/util/ratelimitutils.py`
MadLittleMods Jul 4, 2025
7651a44
Indicate future plan
MadLittleMods Jul 4, 2025
944abdd
Adjust errors
MadLittleMods Jul 4, 2025
143fa3f
Explain when it's okay to ignore lint
MadLittleMods Jul 4, 2025
f66758f
Use `SERVER_NAME_LABEL`
MadLittleMods Jul 4, 2025
3705aec
Support `NameExpr` `SERVER_NAME_LABEL` in lint
MadLittleMods Jul 4, 2025
2ba8a31
Fill in `synapse/notifier.py`
MadLittleMods Jul 4, 2025
a28be1a
Fill in `synapse/appservice/api.py`
MadLittleMods Jul 4, 2025
48782ac
Fill in `synapse/federation/federation_client.py`
MadLittleMods Jul 4, 2025
cdbeb4f
Fill in `synapse/federation/federation_server.py`
MadLittleMods Jul 4, 2025
5cd7f3e
Fill in `synapse/federation/sender/__init__.py`
MadLittleMods Jul 4, 2025
6847cc3
Fill in `synapse/federation/sender/per_destination_queue.py`
MadLittleMods Jul 4, 2025
70547f3
Fill in `synapse/handlers/appservice.py`
MadLittleMods Jul 4, 2025
ecbcdea
Fill in `synapse/handlers/auth.py`
MadLittleMods Jul 4, 2025
6e014d3
Fill in `synapse/handlers/federation_event.py`
MadLittleMods Jul 4, 2025
1fadfed
Fill in `synapse/handlers/presence.py`
MadLittleMods Jul 4, 2025
a6a6863
Fill in `synapse/handlers/register.py`
MadLittleMods Jul 4, 2025
e81be25
Fill in `synapse/handlers/sync.py`
MadLittleMods Jul 4, 2025
d42e742
Fill in `synapse/http/client.py`
MadLittleMods Jul 4, 2025
f197c92
Fill in `synapse/http/matrixfederationclient.py`
MadLittleMods Jul 4, 2025
c28d4bc
Fill in `synapse/http/request_metrics.py`
MadLittleMods Jul 4, 2025
fb5e1f2
Fix `synapse/federation/sender/per_destination_queue.py`
MadLittleMods Jul 4, 2025
49f2098
Fix mypy complaining about `request_metrics` `has-type` lints (part o…
MadLittleMods Jul 4, 2025
347e66d
Fill in `synapse/metrics/__init__.py`
MadLittleMods Jul 4, 2025
88bf526
Fill in `synapse/push/bulk_push_rule_evaluator.py`
MadLittleMods Jul 4, 2025
3cb1d28
Restore and standardize comment for unused metrics
MadLittleMods Jul 4, 2025
c2cb024
Fill in `synapse/push/httppusher.py`
MadLittleMods Jul 4, 2025
e469abe
Fill in `synapse/push/mailer.py`
MadLittleMods Jul 4, 2025
f49c793
Fill in `synapse/replication/http/_base.py`
MadLittleMods Jul 4, 2025
73f05ec
Fill in `synapse/replication/tcp/external_cache.py`
MadLittleMods Jul 4, 2025
7523a5a
Fill in `synapse/replication/tcp/handler.py`
MadLittleMods Jul 4, 2025
3fd2cf4
Fill in `synapse/replication/tcp/protocol.py`
MadLittleMods Jul 4, 2025
513249b
Fill in `synapse/replication/tcp/resource.py`
MadLittleMods Jul 4, 2025
e0ffa36
Fill in `synapse/state/__init__.py`
MadLittleMods Jul 4, 2025
bc17483
Fill in `synapse/storage/database.py`
MadLittleMods Jul 4, 2025
36a7a89
Fill in `synapse/storage/controllers/persist_events.py`
MadLittleMods Jul 4, 2025
24f3e0a
Fill in `synapse/storage/databases/main/event_federation.py`
MadLittleMods Jul 4, 2025
05ef5e2
Fix `server_name` atribute problems with database classes (`EventFede…
MadLittleMods Jul 4, 2025
05ecf5c
Fill in `synapse/storage/databases/main/events.py`
MadLittleMods Jul 4, 2025
732f605
Fill in `synapse/util/ratelimitutils.py`
MadLittleMods Jul 4, 2025
5fb3acb
Fill in `synapse/metrics/background_process_metrics.py`
MadLittleMods Jul 7, 2025
b396b7c
Fill in `synapse/metrics/background_process_metrics.py` (`run_as_back…
MadLittleMods Jul 7, 2025
94062a0
Merge branch 'develop' into madlittlemods/18592-metric-lint-labels
MadLittleMods Jul 18, 2025
268ba92
Merge branch 'develop' into madlittlemods/18592-metric-lint-labels
MadLittleMods Jul 23, 2025
ecb479e
Add changelog
MadLittleMods Jul 23, 2025
1b03330
Fix `make_request(...)` in tests
MadLittleMods Jul 23, 2025
6545c37
Merge branch 'develop' into madlittlemods/18592-metric-lint-labels
MadLittleMods Jul 24, 2025
822935c
Remove debug log
MadLittleMods Jul 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 105 additions & 2 deletions scripts-dev/mypy_synapse_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,13 @@
import mypy.types
from mypy.erasetype import remove_instance_last_known_values
from mypy.errorcodes import ErrorCode
from mypy.nodes import ARG_NAMED_OPT, TempNode, Var
from mypy.plugin import FunctionSigContext, MethodSigContext, Plugin
from mypy.nodes import ARG_NAMED_OPT, ListExpr, NameExpr, TempNode, Var
from mypy.plugin import (
FunctionLike,
FunctionSigContext,
MethodSigContext,
Plugin,
)
from mypy.typeops import bind_self
from mypy.types import (
AnyType,
Expand All @@ -43,11 +48,30 @@
UnionType,
)

PROMETHEUS_METRIC_MISSING_SERVER_NAME_LABEL = ErrorCode(
"missing-server-name-label",
"`SERVER_NAME_LABEL` required in metric",
category="per-homeserver-tenant-metrics",
)


class SynapsePlugin(Plugin):
def get_function_signature_hook(
self, fullname: str
) -> Optional[Callable[[FunctionSigContext], FunctionLike]]:
if fullname in (
"prometheus_client.metrics.Counter",
# TODO: Add other prometheus_client metrics that need checking as we
# refactor, see https://github.com/element-hq/synapse/issues/18592
):
return check_prometheus_metric_instantiation

return None

def get_method_signature_hook(
self, fullname: str
) -> Optional[Callable[[MethodSigContext], CallableType]]:
# print(f"m fullname={fullname}")
if fullname.startswith(
(
"synapse.util.caches.descriptors.CachedFunction.__call__",
Expand All @@ -65,6 +89,85 @@ def get_method_signature_hook(
return None


def check_prometheus_metric_instantiation(ctx: FunctionSigContext) -> CallableType:
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New linting to prevent new Counter metrics from being introduced without the SERVER_NAME_LABEL

"""
Ensure that the `prometheus_client` metrics include the `SERVER_NAME_LABEL` label
when instantiated.

This is important because we support multiple Synapse instances running in the same
process, where all metrics share a single global `REGISTRY`. The `server_name` label
ensures metrics are correctly separated by homeserver.

There are also some metrics that apply at the process level, such as CPU usage,
Python garbage collection, Twisted reactor tick time which shouldn't have the
`SERVER_NAME_LABEL`. In those cases, use use a type ignore comment to disable the
check, e.g. `# type: ignore[missing-server-name-label]`.
"""
# The true signature, this isn't being modified so this is what will be returned.
signature: CallableType = ctx.default_signature

# Sanity check the arguments are still as expected in this version of
# `prometheus_client`. ex. `Counter(name, documentation, labelnames, ...)`
#
# `signature.arg_names` should be: ["name", "documentation", "labelnames", ...]
if len(signature.arg_names) < 3 or signature.arg_names[2] != "labelnames":
ctx.api.fail(
f"Expected the 3rd argument of {signature.name} to be 'labelnames', but got "
f"{signature.arg_names[2]}",
ctx.context,
)
return signature

# Ensure mypy is passing the correct number of arguments because we are doing some
# dirty indexing into `ctx.args` later on.
assert len(ctx.args) == len(signature.arg_names), (
f"Expected the list of arguments in the {signature.name} signature ({len(signature.arg_names)})"
f"to match the number of arguments from the function signature context ({len(ctx.args)})"
)

# Check if the `labelnames` argument includes `SERVER_NAME_LABEL`
#
# `ctx.args` should look like this:
# ```
# [
# [StrExpr("name")],
# [StrExpr("documentation")],
# [ListExpr([StrExpr("label1"), StrExpr("label2")])]
# ...
# ]
# ```
labelnames_arg_expression = ctx.args[2][0] if len(ctx.args[2]) > 0 else None
if isinstance(labelnames_arg_expression, ListExpr):
# Check if the `labelnames` argument includes the `server_name` label (`SERVER_NAME_LABEL`).
for labelname_expression in labelnames_arg_expression.items:
if (
isinstance(labelname_expression, NameExpr)
and labelname_expression.fullname == "synapse.metrics.SERVER_NAME_LABEL"
):
# Found the `SERVER_NAME_LABEL`, all good!
break
else:
ctx.api.fail(
f"Expected {signature.name} to include `SERVER_NAME_LABEL` in the list of labels. "
"If this is a process-level metric (vs homeserver-level), use a type ignore comment "
"to disable this check.",
ctx.context,
code=PROMETHEUS_METRIC_MISSING_SERVER_NAME_LABEL,
)
else:
ctx.api.fail(
f"Expected the `labelnames` argument of {signature.name} to be a list of label names "
f"(including `SERVER_NAME_LABEL`), but got {labelnames_arg_expression}. "
"If this is a process-level metric (vs homeserver-level), use a type ignore comment "
"to disable this check.",
ctx.context,
code=PROMETHEUS_METRIC_MISSING_SERVER_NAME_LABEL,
)
return signature

return signature


def _get_true_return_type(signature: CallableType) -> mypy.types.Type:
"""
Get the "final" return type of a callable which might return an Awaitable/Deferred.
Expand Down
25 changes: 15 additions & 10 deletions synapse/appservice/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from synapse.events.utils import SerializeEventConfig, serialize_event
from synapse.http.client import SimpleHttpClient, is_unknown_endpoint
from synapse.logging import opentracing
from synapse.metrics import SERVER_NAME_LABEL
from synapse.types import DeviceListUpdates, JsonDict, JsonMapping, ThirdPartyInstanceID
from synapse.util.caches.response_cache import ResponseCache

Expand All @@ -59,29 +60,31 @@
sent_transactions_counter = Counter(
"synapse_appservice_api_sent_transactions",
"Number of /transactions/ requests sent",
["service"],
labelnames=["service", SERVER_NAME_LABEL],
)

failed_transactions_counter = Counter(
"synapse_appservice_api_failed_transactions",
"Number of /transactions/ requests that failed to send",
["service"],
labelnames=["service", SERVER_NAME_LABEL],
)

sent_events_counter = Counter(
"synapse_appservice_api_sent_events", "Number of events sent to the AS", ["service"]
"synapse_appservice_api_sent_events",
"Number of events sent to the AS",
labelnames=["service", SERVER_NAME_LABEL],
)

sent_ephemeral_counter = Counter(
"synapse_appservice_api_sent_ephemeral",
"Number of ephemeral events sent to the AS",
["service"],
labelnames=["service", SERVER_NAME_LABEL],
)

sent_todevice_counter = Counter(
"synapse_appservice_api_sent_todevice",
"Number of todevice messages sent to the AS",
["service"],
labelnames=["service", SERVER_NAME_LABEL],
)

HOUR_IN_MS = 60 * 60 * 1000
Expand Down Expand Up @@ -126,6 +129,7 @@ class ApplicationServiceApi(SimpleHttpClient):

def __init__(self, hs: "HomeServer"):
super().__init__(hs)
self.server_name = hs.hostname
self.clock = hs.get_clock()
self.config = hs.config.appservice

Expand Down Expand Up @@ -378,6 +382,7 @@ async def push_bulk(
"left": list(device_list_summary.left),
}

labels = {"service": service.id, SERVER_NAME_LABEL: self.server_name}
try:
args = None
if self.config.use_appservice_legacy_authorization:
Expand All @@ -395,10 +400,10 @@ async def push_bulk(
service.url,
[event.get("event_id") for event in events],
)
sent_transactions_counter.labels(service.id).inc()
sent_events_counter.labels(service.id).inc(len(serialized_events))
sent_ephemeral_counter.labels(service.id).inc(len(ephemeral))
sent_todevice_counter.labels(service.id).inc(len(to_device_messages))
sent_transactions_counter.labels(**labels).inc()
sent_events_counter.labels(**labels).inc(len(serialized_events))
sent_ephemeral_counter.labels(**labels).inc(len(ephemeral))
sent_todevice_counter.labels(**labels).inc(len(to_device_messages))
return True
except CodeMessageException as e:
logger.warning(
Expand All @@ -417,7 +422,7 @@ async def push_bulk(
ex.args,
exc_info=logger.isEnabledFor(logging.DEBUG),
)
failed_transactions_counter.labels(service.id).inc()
failed_transactions_counter.labels(**labels).inc()
return False

async def claim_client_keys(
Expand Down
29 changes: 22 additions & 7 deletions synapse/federation/federation_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
from synapse.http.client import is_unknown_endpoint
from synapse.http.types import QueryParams
from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, tag_args, trace
from synapse.metrics import SERVER_NAME_LABEL
from synapse.types import JsonDict, StrCollection, UserID, get_domain_from_id
from synapse.types.handlers.policy_server import RECOMMENDATION_OK, RECOMMENDATION_SPAM
from synapse.util.async_helpers import concurrently_execute
Expand All @@ -85,7 +86,9 @@

logger = logging.getLogger(__name__)

sent_queries_counter = Counter("synapse_federation_client_sent_queries", "", ["type"])
sent_queries_counter = Counter(
"synapse_federation_client_sent_queries", "", labelnames=["type", SERVER_NAME_LABEL]
)


PDU_RETRY_TIME_MS = 1 * 60 * 1000
Expand Down Expand Up @@ -137,7 +140,7 @@ def __init__(self, hs: "HomeServer"):
self.state = hs.get_state_handler()
self.transport_layer = hs.get_federation_transport_client()

self.hostname = hs.hostname
self.server_name = hs.hostname
self.signing_key = hs.signing_key

# Cache mapping `event_id` to a tuple of the event itself and the `pull_origin`
Expand Down Expand Up @@ -207,7 +210,10 @@ async def make_query(
Returns:
The JSON object from the response
"""
sent_queries_counter.labels(query_type).inc()
sent_queries_counter.labels(
type=query_type,
**{SERVER_NAME_LABEL: self.server_name},
).inc()

return await self.transport_layer.make_query(
destination,
Expand All @@ -229,7 +235,10 @@ async def query_client_keys(
Returns:
The JSON object from the response
"""
sent_queries_counter.labels("client_device_keys").inc()
sent_queries_counter.labels(
type="client_device_keys",
**{SERVER_NAME_LABEL: self.server_name},
).inc()
return await self.transport_layer.query_client_keys(
destination, content, timeout
)
Expand All @@ -240,7 +249,10 @@ async def query_user_devices(
"""Query the device keys for a list of user ids hosted on a remote
server.
"""
sent_queries_counter.labels("user_devices").inc()
sent_queries_counter.labels(
type="user_devices",
**{SERVER_NAME_LABEL: self.server_name},
).inc()
return await self.transport_layer.query_user_devices(
destination, user_id, timeout
)
Expand All @@ -262,7 +274,10 @@ async def claim_client_keys(
Returns:
The JSON object from the response
"""
sent_queries_counter.labels("client_one_time_keys").inc()
sent_queries_counter.labels(
type="client_one_time_keys",
**{SERVER_NAME_LABEL: self.server_name},
).inc()

# Convert the query with counts into a stable and unstable query and check
# if attempting to claim more than 1 OTK.
Expand Down Expand Up @@ -1068,7 +1083,7 @@ async def send_request(destination: str) -> Tuple[str, EventBase, RoomVersion]:
# there's some we never care about
ev = builder.create_local_event_from_event_dict(
self._clock,
self.hostname,
self.server_name,
self.signing_key,
room_version=room_version,
event_dict=pdu_dict,
Expand Down
24 changes: 18 additions & 6 deletions synapse/federation/federation_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
tag_args,
trace,
)
from synapse.metrics import SERVER_NAME_LABEL
from synapse.metrics.background_process_metrics import wrap_as_background_process
from synapse.replication.http.federation import (
ReplicationFederationSendEduRestServlet,
Expand All @@ -105,12 +106,18 @@

logger = logging.getLogger(__name__)

received_pdus_counter = Counter("synapse_federation_server_received_pdus", "")
received_pdus_counter = Counter(
"synapse_federation_server_received_pdus", "", labelnames=[SERVER_NAME_LABEL]
)

received_edus_counter = Counter("synapse_federation_server_received_edus", "")
received_edus_counter = Counter(
"synapse_federation_server_received_edus", "", labelnames=[SERVER_NAME_LABEL]
)

received_queries_counter = Counter(
"synapse_federation_server_received_queries", "", ["type"]
"synapse_federation_server_received_queries",
"",
labelnames=["type", SERVER_NAME_LABEL],
)

pdu_process_time = Histogram(
Expand Down Expand Up @@ -424,7 +431,9 @@ async def _handle_pdus_in_txn(
report back to the sending server.
"""

received_pdus_counter.inc(len(transaction.pdus))
received_pdus_counter.labels(**{SERVER_NAME_LABEL: self.server_name}).inc(
len(transaction.pdus)
)

origin_host, _ = parse_server_name(origin)

Expand Down Expand Up @@ -543,7 +552,7 @@ async def _handle_edus_in_txn(self, origin: str, transaction: Transaction) -> No
"""Process the EDUs in a received transaction."""

async def _process_edu(edu_dict: JsonDict) -> None:
received_edus_counter.inc()
received_edus_counter.labels(**{SERVER_NAME_LABEL: self.server_name}).inc()

edu = Edu(
origin=origin,
Expand Down Expand Up @@ -658,7 +667,10 @@ async def on_pdu_request(
async def on_query_request(
self, query_type: str, args: Dict[str, str]
) -> Tuple[int, Dict[str, Any]]:
received_queries_counter.labels(query_type).inc()
received_queries_counter.labels(
type=query_type,
**{SERVER_NAME_LABEL: self.server_name},
).inc()
resp = await self.registry.on_query(query_type, args)
return 200, resp

Expand Down
Loading
Loading