Skip to content
Merged
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
4df927b
Add in base linting for metrics
MadLittleMods Jul 23, 2025
cec25dd
Support `labelnames` argument being a Tuple expression
MadLittleMods Jul 23, 2025
d62d564
Make label name arg position/name generic
MadLittleMods Jul 24, 2025
325caec
Update wording
MadLittleMods Jul 24, 2025
53bfbc6
Explain why `prometheus_client.metrics_core.Metric` isn't included here
MadLittleMods Jul 24, 2025
395b33b
Disambiguate
MadLittleMods Jul 24, 2025
7818657
Remove debug log
MadLittleMods Jul 24, 2025
6bb5fef
Fix lints in in lint
MadLittleMods Jul 24, 2025
b073bb1
Merge branch 'develop' into madlittlemods/18592-lint-metric
MadLittleMods Jul 25, 2025
6758742
Add cross-check for `prometheus_metric_fullname_to_label_arg_map`
MadLittleMods Jul 28, 2025
c53d34d
Ignore some process-level metrics
MadLittleMods Jul 28, 2025
07e79fe
Ignore process-level metrics
MadLittleMods Jul 28, 2025
4902046
Fix `JemallocCollector` edge case
MadLittleMods Jul 28, 2025
da38631
Support almost all metric types
MadLittleMods Jul 28, 2025
b46040a
Add some cross-links
MadLittleMods Jul 28, 2025
8d706ac
Lint `Metric` types
MadLittleMods Jul 28, 2025
478edc2
Ignore `Metric` usage where necessary
MadLittleMods Jul 28, 2025
62ce1e0
Add changelog
MadLittleMods Jul 28, 2025
9166d55
Remove irrelevant comment
MadLittleMods Jul 29, 2025
2d947a4
Beef up comment
MadLittleMods Jul 29, 2025
89076a4
Mark off TODO about `Metric`
MadLittleMods Jul 29, 2025
a90e19b
Merge branch 'develop' into madlittlemods/18592-lint-metric
MadLittleMods Jul 29, 2025
44e30cf
Merge branch 'develop' into madlittlemods/18592-lint-metric
MadLittleMods Jul 29, 2025
86a0bcd
Merge branch 'develop' into madlittlemods/18592-lint-metric
MadLittleMods Jul 29, 2025
ab5a756
Uncomment merged lints
MadLittleMods Jul 29, 2025
8c380c0
Merge branch 'develop' into madlittlemods/18592-lint-metric
MadLittleMods Jul 29, 2025
bec7bfb
We can now use the full list
MadLittleMods Jul 29, 2025
21ef4b4
Workaround mypy limitation only calling `get_base_class_hook` for one…
MadLittleMods Jul 31, 2025
61a6f39
More sound logic
MadLittleMods Jul 31, 2025
4965340
Use lint instead of assert
MadLittleMods Jul 31, 2025
a7573a7
Make sure pydantic mypy hooks are still being called
MadLittleMods Aug 1, 2025
d2efe1d
Add more explicit instructions
MadLittleMods Aug 1, 2025
f2f1b9e
Merge branch 'develop' into madlittlemods/18592-lint-metric
MadLittleMods Aug 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/18733.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Update metrics linting to be able to handle custom metrics.
5 changes: 4 additions & 1 deletion mypy.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[mypy]
namespace_packages = True
plugins = pydantic.mypy, mypy_zope:plugin, scripts-dev/mypy_synapse_plugin.py
plugins = scripts-dev/mypy_synapse_plugin.py, pydantic.mypy, mypy_zope:plugin
follow_imports = normal
show_error_codes = True
show_traceback = True
Expand Down Expand Up @@ -99,3 +99,6 @@ ignore_missing_imports = True

[mypy-multipart.*]
ignore_missing_imports = True

[mypy-mypy_zope.*]
ignore_missing_imports = True
225 changes: 210 additions & 15 deletions scripts-dev/mypy_synapse_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,17 @@
can crop up, e.g the cache descriptors.
"""

from typing import Callable, Optional, Tuple, Type, Union
import enum
from typing import Callable, Mapping, Optional, Tuple, Type, Union

import attr
import mypy.types
from mypy.erasetype import remove_instance_last_known_values
from mypy.errorcodes import ErrorCode
from mypy.nodes import ARG_NAMED_OPT, ListExpr, NameExpr, TempNode, TupleExpr, Var
from mypy.plugin import (
ClassDefContext,
Context,
FunctionLike,
FunctionSigContext,
MethodSigContext,
Expand All @@ -41,32 +45,149 @@
CallableType,
Instance,
NoneType,
Options,
TupleType,
TypeAliasType,
TypeVarType,
UninhabitedType,
UnionType,
)
from mypy_zope import plugin as mypy_zope_plugin

PROMETHEUS_METRIC_MISSING_SERVER_NAME_LABEL = ErrorCode(
"missing-server-name-label",
"`SERVER_NAME_LABEL` required in metric",
category="per-homeserver-tenant-metrics",
)

PROMETHEUS_METRIC_MISSING_FROM_LIST_TO_CHECK = ErrorCode(
"metric-type-missing-from-list",
"Every Prometheus metric type must be included in the `prometheus_metric_fullname_to_label_arg_map`.",
category="per-homeserver-tenant-metrics",
)


class Sentinel(enum.Enum):
# defining a sentinel in this way allows mypy to correctly handle the
# type of a dictionary lookup and subsequent type narrowing.
UNSET_SENTINEL = object()


@attr.s(auto_attribs=True)
class ArgLocation:
keyword_name: str
"""
The keyword argument name for this argument
"""
position: int
"""
The 0-based positional index of this argument
"""


prometheus_metric_fullname_to_label_arg_map: Mapping[str, Optional[ArgLocation]] = {
# `Collector` subclasses:
"prometheus_client.metrics.MetricWrapperBase": ArgLocation("labelnames", 2),
"prometheus_client.metrics.Counter": ArgLocation("labelnames", 2),
"prometheus_client.metrics.Histogram": ArgLocation("labelnames", 2),
"prometheus_client.metrics.Gauge": ArgLocation("labelnames", 2),
"prometheus_client.metrics.Summary": ArgLocation("labelnames", 2),
"prometheus_client.metrics.Info": ArgLocation("labelnames", 2),
"prometheus_client.metrics.Enum": ArgLocation("labelnames", 2),
"synapse.metrics.LaterGauge": ArgLocation("labelnames", 2),
"synapse.metrics.InFlightGauge": ArgLocation("labels", 2),
"synapse.metrics.GaugeBucketCollector": ArgLocation("labelnames", 2),
"prometheus_client.registry.Collector": None,
"prometheus_client.registry._EmptyCollector": None,
"prometheus_client.registry.CollectorRegistry": None,
"prometheus_client.process_collector.ProcessCollector": None,
"prometheus_client.platform_collector.PlatformCollector": None,
"prometheus_client.gc_collector.GCCollector": None,
"synapse.metrics._gc.GCCounts": None,
"synapse.metrics._gc.PyPyGCStats": None,
"synapse.metrics._reactor_metrics.ReactorLastSeenMetric": None,
"synapse.metrics.CPUMetrics": None,
"synapse.metrics.jemalloc.JemallocCollector": None,
"synapse.util.metrics.DynamicCollectorRegistry": None,
"synapse.metrics.background_process_metrics._Collector": None,
#
# `Metric` subclasses:
"prometheus_client.metrics_core.Metric": None,
"prometheus_client.metrics_core.UnknownMetricFamily": ArgLocation("labels", 3),
"prometheus_client.metrics_core.CounterMetricFamily": ArgLocation("labels", 3),
"prometheus_client.metrics_core.GaugeMetricFamily": ArgLocation("labels", 3),
"prometheus_client.metrics_core.SummaryMetricFamily": ArgLocation("labels", 3),
"prometheus_client.metrics_core.InfoMetricFamily": ArgLocation("labels", 3),
"prometheus_client.metrics_core.HistogramMetricFamily": ArgLocation("labels", 3),
"prometheus_client.metrics_core.GaugeHistogramMetricFamily": ArgLocation(
"labels", 4
),
"prometheus_client.metrics_core.StateSetMetricFamily": ArgLocation("labels", 3),
"synapse.metrics.GaugeHistogramMetricFamilyWithLabels": ArgLocation(
"labelnames", 4
),
}
"""
Map from the fullname of the Prometheus `Metric`/`Collector` classes to the keyword
argument name and positional index of the label names. This map is useful because
different metrics have different signatures for passing in label names and we just need
to know where to look.

This map should include any metrics that we collect with Prometheus. Which corresponds
to anything that inherits from `prometheus_client.registry.Collector`
(`synapse.metrics._types.Collector`) or `prometheus_client.metrics_core.Metric`. The
exhaustiveness of this list is enforced by `analyze_prometheus_metric_classes`.

The entries with `None` always fail the lint because they don't have a `labelnames`
argument (therefore, no `SERVER_NAME_LABEL`), but we include them here so that people
can notice and manually allow via a type ignore comment as the source of truth
should be in the source code.
"""

# Unbound at this point because we don't know the mypy version yet.
# This is set in the `plugin(...)` function below.
MypyZopePluginClass: Type[Plugin]


class SynapsePlugin(Plugin):
def __init__(self, options: Options):
super().__init__(options)
self.mypy_zope_plugin = MypyZopePluginClass(options)

def get_base_class_hook(
self, fullname: str
) -> Optional[Callable[[ClassDefContext], None]]:
def _get_base_class_hook(ctx: ClassDefContext) -> None:
# Run any `get_base_class_hook` checks from other plugins first.
#
# Unfortunately, because mypy only chooses the first plugin that returns a
# non-None value (known-limitation, c.f.
# https://github.com/python/mypy/issues/19524), we workaround this by
# putting our custom plugin first in the plugin order and then calling the
# other plugin's hook manually followed by our own checks.
if callback := self.mypy_zope_plugin.get_base_class_hook(fullname):
callback(ctx)

Comment thread
MadLittleMods marked this conversation as resolved.
# Now run our own checks
analyze_prometheus_metric_classes(ctx)

return _get_base_class_hook

def get_function_signature_hook(
self, fullname: str
) -> Optional[Callable[[FunctionSigContext], FunctionLike]]:
if fullname in (
"prometheus_client.metrics.Counter",
"prometheus_client.metrics.Histogram",
"prometheus_client.metrics.Gauge",
# TODO: Add other prometheus_client metrics that need checking as we
# refactor, see https://github.com/element-hq/synapse/issues/18592
):
return check_prometheus_metric_instantiation
# Strip off the unique identifier for classes that are dynamically created inside
# functions. ex. `synapse.metrics.jemalloc.JemallocCollector@185` (this is the line
# number)
if "@" in fullname:
fullname = fullname.split("@", 1)[0]

# Look for any Prometheus metrics to make sure they have the `SERVER_NAME_LABEL`
# label.
if fullname in prometheus_metric_fullname_to_label_arg_map.keys():
# Because it's difficult to determine the `fullname` of the function in the
# callback, let's just pass it in while we have it.
return lambda ctx: check_prometheus_metric_instantiation(ctx, fullname)

return None

Expand All @@ -90,7 +211,44 @@ def get_method_signature_hook(
return None


def check_prometheus_metric_instantiation(ctx: FunctionSigContext) -> CallableType:
def analyze_prometheus_metric_classes(ctx: ClassDefContext) -> None:
"""
Cross-check the list of Prometheus metric classes against the
`prometheus_metric_fullname_to_label_arg_map` to ensure the list is exhaustive and
up-to-date.
"""

fullname = ctx.cls.fullname
# Strip off the unique identifier for classes that are dynamically created inside
# functions. ex. `synapse.metrics.jemalloc.JemallocCollector@185` (this is the line
# number)
if "@" in fullname:
fullname = fullname.split("@", 1)[0]

if any(
ancestor_type.fullname
in (
# All of the Prometheus metric classes inherit from the `Collector`.
"prometheus_client.registry.Collector",
"synapse.metrics._types.Collector",
# And custom metrics that inherit from `Metric`.
"prometheus_client.metrics_core.Metric",
)
for ancestor_type in ctx.cls.info.mro
):
if fullname not in prometheus_metric_fullname_to_label_arg_map:
ctx.api.fail(
f"Expected {fullname} to be in `prometheus_metric_fullname_to_label_arg_map`, "
f"but it was not found. This is a problem with our custom mypy plugin. "
f"Please add it to the map.",
Context(),
code=PROMETHEUS_METRIC_MISSING_FROM_LIST_TO_CHECK,
)


def check_prometheus_metric_instantiation(
ctx: FunctionSigContext, fullname: str
) -> CallableType:
"""
Ensure that the `prometheus_client` metrics include the `SERVER_NAME_LABEL` label
when instantiated.
Expand All @@ -103,18 +261,49 @@ def check_prometheus_metric_instantiation(ctx: FunctionSigContext) -> CallableTy
Python garbage collection, and Twisted reactor tick time, which shouldn't have the
`SERVER_NAME_LABEL`. In those cases, use a type ignore comment to disable the
check, e.g. `# type: ignore[missing-server-name-label]`.

Args:
ctx: The `FunctionSigContext` from mypy.
fullname: The fully qualified name of the function being called,
e.g. `"prometheus_client.metrics.Counter"`
"""
# The true signature, this isn't being modified so this is what will be returned.
signature: CallableType = ctx.default_signature
signature = ctx.default_signature

# Find where the label names argument is in the function signature.
arg_location = prometheus_metric_fullname_to_label_arg_map.get(
fullname, Sentinel.UNSET_SENTINEL
)
assert arg_location is not Sentinel.UNSET_SENTINEL, (
f"Expected to find {fullname} in `prometheus_metric_fullname_to_label_arg_map`, "
f"but it was not found. This is a problem with our custom mypy plugin. "
f"Please add it to the map. Context: {ctx.context}"
)
# People should be using `# type: ignore[missing-server-name-label]` for
# process-level metrics that should not have the `SERVER_NAME_LABEL`.
if arg_location is None:
ctx.api.fail(
f"{signature.name} does not have a `labelnames`/`labels` argument "
"(if this is untrue, update `prometheus_metric_fullname_to_label_arg_map` "
"in our custom mypy plugin) and should probably have a type ignore comment, "
"e.g. `# type: ignore[missing-server-name-label]`. The reason we don't "
"automatically ignore this is the source of truth should be in the source code.",
ctx.context,
code=PROMETHEUS_METRIC_MISSING_SERVER_NAME_LABEL,
)
return signature

# Sanity check the arguments are still as expected in this version of
# `prometheus_client`. ex. `Counter(name, documentation, labelnames, ...)`
#
# `signature.arg_names` should be: ["name", "documentation", "labelnames", ...]
if len(signature.arg_names) < 3 or signature.arg_names[2] != "labelnames":
if (
len(signature.arg_names) < (arg_location.position + 1)
or signature.arg_names[arg_location.position] != arg_location.keyword_name
):
ctx.api.fail(
f"Expected the 3rd argument of {signature.name} to be 'labelnames', but got "
f"{signature.arg_names[2]}",
f"Expected argument number {arg_location.position + 1} of {signature.name} to be `labelnames`/`labels`, "
f"but got {signature.arg_names[arg_location.position]}",
ctx.context,
)
return signature
Expand All @@ -137,7 +326,11 @@ def check_prometheus_metric_instantiation(ctx: FunctionSigContext) -> CallableTy
# ...
# ]
# ```
labelnames_arg_expression = ctx.args[2][0] if len(ctx.args[2]) > 0 else None
labelnames_arg_expression = (
ctx.args[arg_location.position][0]
if len(ctx.args[arg_location.position]) > 0
else None
)
if isinstance(labelnames_arg_expression, (ListExpr, TupleExpr)):
# Check if the `labelnames` argument includes the `server_name` label (`SERVER_NAME_LABEL`).
for labelname_expression in labelnames_arg_expression.items:
Expand Down Expand Up @@ -476,10 +669,12 @@ def is_cacheable(


def plugin(version: str) -> Type[SynapsePlugin]:
global MypyZopePluginClass
# This is the entry point of the plugin, and lets us deal with the fact
# that the mypy plugin interface is *not* stable by looking at the version
# string.
#
# However, since we pin the version of mypy Synapse uses in CI, we don't
# really care.
MypyZopePluginClass = mypy_zope_plugin(version)
return SynapsePlugin
25 changes: 18 additions & 7 deletions synapse/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,9 @@ class LaterGauge(Collector):
]

def collect(self) -> Iterable[Metric]:
g = GaugeMetricFamily(self.name, self.desc, labels=self.labelnames)
# The decision to add `SERVER_NAME_LABEL` is from the `LaterGauge` usage itself
# (we don't enforce it here, one level up).
g = GaugeMetricFamily(self.name, self.desc, labels=self.labelnames) # type: ignore[missing-server-name-label]

try:
calls = self.caller()
Expand Down Expand Up @@ -304,7 +306,9 @@ def collect(self) -> Iterable[Metric]:

Note: may be called by a separate thread.
"""
in_flight = GaugeMetricFamily(
# The decision to add `SERVER_NAME_LABEL` is from the `GaugeBucketCollector`
# usage itself (we don't enforce it here, one level up).
in_flight = GaugeMetricFamily( # type: ignore[missing-server-name-label]
self.name + "_total", self.desc, labels=self.labels
)

Expand All @@ -328,7 +332,9 @@ def collect(self) -> Iterable[Metric]:
yield in_flight

for name in self.sub_metrics:
gauge = GaugeMetricFamily(
# The decision to add `SERVER_NAME_LABEL` is from the `InFlightGauge` usage
# itself (we don't enforce it here, one level up).
gauge = GaugeMetricFamily( # type: ignore[missing-server-name-label]
"_".join([self.name, name]), "", labels=self.labels
)
for key, metrics in metrics_by_key.items():
Expand Down Expand Up @@ -483,7 +489,9 @@ def _values_to_metric(
# that bucket or below.
accumulated_values = itertools.accumulate(bucket_values)

return GaugeHistogramMetricFamilyWithLabels(
# The decision to add `SERVER_NAME_LABEL` is from the `GaugeBucketCollector`
# usage itself (we don't enforce it here, one level up).
return GaugeHistogramMetricFamilyWithLabels( # type: ignore[missing-server-name-label]
name=self._name,
documentation=self._documentation,
labelnames=self._labelnames,
Expand Down Expand Up @@ -519,16 +527,19 @@ def collect(self) -> Iterable[Metric]:
line = s.read()
raw_stats = line.split(") ", 1)[1].split(" ")

user = GaugeMetricFamily("process_cpu_user_seconds_total", "")
# This is a process-level metric, so it does not have the `SERVER_NAME_LABEL`.
user = GaugeMetricFamily("process_cpu_user_seconds_total", "") # type: ignore[missing-server-name-label]
user.add_metric([], float(raw_stats[11]) / self.ticks_per_sec)
yield user

sys = GaugeMetricFamily("process_cpu_system_seconds_total", "")
# This is a process-level metric, so it does not have the `SERVER_NAME_LABEL`.
sys = GaugeMetricFamily("process_cpu_system_seconds_total", "") # type: ignore[missing-server-name-label]
sys.add_metric([], float(raw_stats[12]) / self.ticks_per_sec)
yield sys


REGISTRY.register(CPUMetrics())
# This is a process-level metric, so it does not have the `SERVER_NAME_LABEL`.
REGISTRY.register(CPUMetrics()) # type: ignore[missing-server-name-label]


#
Expand Down
Loading
Loading