Skip to content
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
4df927b
Add in base linting for metrics
MadLittleMods Jul 23, 2025
cec25dd
Support `labelnames` argument being a Tuple expression
MadLittleMods Jul 23, 2025
d62d564
Make label name arg position/name generic
MadLittleMods Jul 24, 2025
325caec
Update wording
MadLittleMods Jul 24, 2025
53bfbc6
Explain why `prometheus_client.metrics_core.Metric` isn't included here
MadLittleMods Jul 24, 2025
395b33b
Disambiguate
MadLittleMods Jul 24, 2025
7818657
Remove debug log
MadLittleMods Jul 24, 2025
6bb5fef
Fix lints in in lint
MadLittleMods Jul 24, 2025
b073bb1
Merge branch 'develop' into madlittlemods/18592-lint-metric
MadLittleMods Jul 25, 2025
6758742
Add cross-check for `prometheus_metric_fullname_to_label_arg_map`
MadLittleMods Jul 28, 2025
c53d34d
Ignore some process-level metrics
MadLittleMods Jul 28, 2025
07e79fe
Ignore process-level metrics
MadLittleMods Jul 28, 2025
4902046
Fix `JemallocCollector` edge case
MadLittleMods Jul 28, 2025
da38631
Support almost all metric types
MadLittleMods Jul 28, 2025
b46040a
Add some cross-links
MadLittleMods Jul 28, 2025
8d706ac
Lint `Metric` types
MadLittleMods Jul 28, 2025
478edc2
Ignore `Metric` usage where necessary
MadLittleMods Jul 28, 2025
62ce1e0
Add changelog
MadLittleMods Jul 28, 2025
9166d55
Remove irrelevant comment
MadLittleMods Jul 29, 2025
2d947a4
Beef up comment
MadLittleMods Jul 29, 2025
89076a4
Mark off TODO about `Metric`
MadLittleMods Jul 29, 2025
a90e19b
Merge branch 'develop' into madlittlemods/18592-lint-metric
MadLittleMods Jul 29, 2025
44e30cf
Merge branch 'develop' into madlittlemods/18592-lint-metric
MadLittleMods Jul 29, 2025
86a0bcd
Merge branch 'develop' into madlittlemods/18592-lint-metric
MadLittleMods Jul 29, 2025
ab5a756
Uncomment merged lints
MadLittleMods Jul 29, 2025
8c380c0
Merge branch 'develop' into madlittlemods/18592-lint-metric
MadLittleMods Jul 29, 2025
bec7bfb
We can now use the full list
MadLittleMods Jul 29, 2025
21ef4b4
Workaround mypy limitation only calling `get_base_class_hook` for one…
MadLittleMods Jul 31, 2025
61a6f39
More sound logic
MadLittleMods Jul 31, 2025
4965340
Use lint instead of assert
MadLittleMods Jul 31, 2025
a7573a7
Make sure pydantic mypy hooks are still being called
MadLittleMods Aug 1, 2025
d2efe1d
Add more explicit instructions
MadLittleMods Aug 1, 2025
f2f1b9e
Merge branch 'develop' into madlittlemods/18592-lint-metric
MadLittleMods Aug 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/18733.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Update metrics linting to be able to handle custom metrics.
224 changes: 214 additions & 10 deletions scripts-dev/mypy_synapse_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,16 @@
can crop up, e.g the cache descriptors.
"""

from typing import Callable, Optional, Tuple, Type, Union
import enum
from typing import Callable, Mapping, Optional, Tuple, Type, Union

import attr
import mypy.types
from mypy.erasetype import remove_instance_last_known_values
from mypy.errorcodes import ErrorCode
from mypy.nodes import ARG_NAMED_OPT, ListExpr, NameExpr, TempNode, TupleExpr, Var
from mypy.plugin import (
ClassDefContext,
FunctionLike,
FunctionSigContext,
MethodSigContext,
Expand All @@ -55,17 +58,149 @@
)


class Sentinel(enum.Enum):
# defining a sentinel in this way allows mypy to correctly handle the
# type of a dictionary lookup and subsequent type narrowing.
UNSET_SENTINEL = object()


@attr.s(auto_attribs=True)
class ArgLocation:
keyword_name: str
"""
The keyword argument name for this argument
"""
position: int
"""
The 0-based positional index of this argument
"""


prometheus_metric_fullname_to_label_arg_map: Mapping[str, Optional[ArgLocation]] = {
# `Collector` subclasses:
"prometheus_client.metrics.MetricWrapperBase": ArgLocation("labelnames", 2),
"prometheus_client.metrics.Counter": ArgLocation("labelnames", 2),
"prometheus_client.metrics.Histogram": ArgLocation("labelnames", 2),
"prometheus_client.metrics.Gauge": ArgLocation("labelnames", 2),
"prometheus_client.metrics.Summary": ArgLocation("labelnames", 2),
"prometheus_client.metrics.Info": ArgLocation("labelnames", 2),
"prometheus_client.metrics.Enum": ArgLocation("labelnames", 2),
"synapse.metrics.LaterGauge": ArgLocation("labelnames", 2),
"synapse.metrics.InFlightGauge": ArgLocation("labels", 2),
"synapse.metrics.GaugeBucketCollector": ArgLocation("labelnames", 2),
"prometheus_client.registry.Collector": None,
"prometheus_client.registry._EmptyCollector": None,
"prometheus_client.registry.CollectorRegistry": None,
"prometheus_client.process_collector.ProcessCollector": None,
"prometheus_client.platform_collector.PlatformCollector": None,
"prometheus_client.gc_collector.GCCollector": None,
"synapse.metrics._gc.GCCounts": None,
"synapse.metrics._gc.PyPyGCStats": None,
"synapse.metrics._reactor_metrics.ReactorLastSeenMetric": None,
"synapse.metrics.CPUMetrics": None,
"synapse.metrics.jemalloc.JemallocCollector": None,
"synapse.util.metrics.DynamicCollectorRegistry": None,
"synapse.metrics.background_process_metrics._Collector": None,
#
# `Metric` subclasses:
"prometheus_client.metrics_core.Metric": None,
"prometheus_client.metrics_core.UnknownMetricFamily": ArgLocation("labels", 3),
"prometheus_client.metrics_core.CounterMetricFamily": ArgLocation("labels", 3),
"prometheus_client.metrics_core.GaugeMetricFamily": ArgLocation("labels", 3),
"prometheus_client.metrics_core.SummaryMetricFamily": ArgLocation("labels", 3),
"prometheus_client.metrics_core.InfoMetricFamily": ArgLocation("labels", 3),
"prometheus_client.metrics_core.HistogramMetricFamily": ArgLocation("labels", 3),
"prometheus_client.metrics_core.GaugeHistogramMetricFamily": ArgLocation(
"labels", 4
),
"prometheus_client.metrics_core.StateSetMetricFamily": ArgLocation("labels", 3),
"synapse.metrics.GaugeHistogramMetricFamilyWithLabels": ArgLocation(
"labelnames", 4
),
}
"""
Map from the fullname of the Prometheus `Metric`/`Collector` classes to the keyword
argument name and positional index of the label names. This map is useful because
different metrics have different signatures for passing in label names and we just need
to know where to look.

This map should include any metrics that we collect with Prometheus. Which corresponds
to anything that inherits from `prometheus_client.registry.Collector`
(`synapse.metrics._types.Collector`) or `prometheus_client.metrics_core.Metric`. The
exhaustiveness of this list is enforced by `analyze_prometheus_metric_classes`.

The entries with `None` always fail the lint because they don't have a `labelnames`
argument (therefore, no `SERVER_NAME_LABEL`), but we include them here so that people
can notice and manually allow via a type ignore comment as the source of truth
should be in the source code.
"""


class SynapsePlugin(Plugin):
# FIXME: Unfortunately, because mypy only chooses the first plugin that returns a
# non-None value, this check doesn't get run during our normal mypy lint process
# because `mypy_zope` also uses the `get_base_class_hook` method. Currently, this
# only works if you comment out the `mypy_zope` plugin and squint through the output
# caused by those failures. Unaware of a suitable workaround to actually enforce
# this on a regulard basis and in CI. Related issue: https://github.com/python/mypy/issues/19524
Comment thread
MadLittleMods marked this conversation as resolved.
Outdated
Comment thread
MadLittleMods marked this conversation as resolved.
Outdated
def get_base_class_hook(
self, fullname: str
) -> Optional[Callable[[ClassDefContext], None]]:
return analyze_prometheus_metric_classes

def get_function_signature_hook(
self, fullname: str
) -> Optional[Callable[[FunctionSigContext], FunctionLike]]:
# Strip off the unique identifier for classes that are dynamically created inside
# functions. ex. `synapse.metrics.jemalloc.JemallocCollector@185` (this is the line
# number)
if "@" in fullname:
fullname = fullname.split("@", 1)[0]

# TODO: Use `prometheus_metric_fullname_to_label_arg_map.keys()` once we've
# updated all of the metrics, see
# https://github.com/element-hq/synapse/issues/18592
if fullname in (
"prometheus_client.metrics.MetricWrapperBase",
"prometheus_client.metrics.Counter",
# Tracked by https://github.com/element-hq/synapse/pull/18724
# "prometheus_client.metrics.Histogram",
"prometheus_client.metrics.Gauge",
# TODO: Add other prometheus_client metrics that need checking as we
# refactor, see https://github.com/element-hq/synapse/issues/18592
"prometheus_client.metrics.Summary",
"prometheus_client.metrics.Info",
"prometheus_client.metrics.Enum",
# Tracked by https://github.com/element-hq/synapse/pull/18714
# "synapse.metrics.LaterGauge",
"synapse.metrics.InFlightGauge",
# Tracked by https://github.com/element-hq/synapse/pull/18715
# "synapse.metrics.GaugeBucketCollector",
"prometheus_client.registry.Collector",
"prometheus_client.registry._EmptyCollector",
"prometheus_client.registry.CollectorRegistry",
"prometheus_client.process_collector.ProcessCollector",
"prometheus_client.platform_collector.PlatformCollector",
"prometheus_client.gc_collector.GCCollector",
"synapse.metrics._gc.GCCounts",
"synapse.metrics._gc.PyPyGCStats",
"synapse.metrics._reactor_metrics.ReactorLastSeenMetric",
"synapse.metrics.CPUMetrics",
"synapse.metrics.jemalloc.JemallocCollector",
"synapse.util.metrics.DynamicCollectorRegistry",
"synapse.metrics.background_process_metrics._Collector",
"prometheus_client.metrics_core.Metric",
"prometheus_client.metrics_core.UnknownMetricFamily",
"prometheus_client.metrics_core.CounterMetricFamily",
"prometheus_client.metrics_core.GaugeMetricFamily",
"prometheus_client.metrics_core.SummaryMetricFamily",
"prometheus_client.metrics_core.InfoMetricFamily",
"prometheus_client.metrics_core.HistogramMetricFamily",
"prometheus_client.metrics_core.GaugeHistogramMetricFamily",
"prometheus_client.metrics_core.StateSetMetricFamily",
"synapse.metrics.GaugeHistogramMetricFamilyWithLabels",
):
return check_prometheus_metric_instantiation
# Because it's difficult to determine the `fullname` of the function in the
# callback, let's just pass it in while we have it.
return lambda ctx: check_prometheus_metric_instantiation(ctx, fullname)

return None

Expand All @@ -89,7 +224,41 @@ def get_method_signature_hook(
return None


def check_prometheus_metric_instantiation(ctx: FunctionSigContext) -> CallableType:
def analyze_prometheus_metric_classes(ctx: ClassDefContext) -> None:
"""
Cross-check the list of Prometheus metric classes against the
`prometheus_metric_fullname_to_label_arg_map` to ensure the list is exhaustive and
up-to-date.
"""

fullname = ctx.cls.fullname
# Strip off the unique identifier for classes that are dynamically created inside
# functions. ex. `synapse.metrics.jemalloc.JemallocCollector@185` (this is the line
# number)
if "@" in fullname:
fullname = fullname.split("@", 1)[0]

if any(
ancestor_type.fullname
in (
# All of the Prometheus metric classes inherit from the `Collector`.
"prometheus_client.registry.Collector",
"synapse.metrics._types.Collector",
# And custom metrics that inherit from `Metric`.
"prometheus_client.metrics_core.Metric",
)
for ancestor_type in ctx.cls.info.mro
):
assert fullname in prometheus_metric_fullname_to_label_arg_map, (
f"Expected {fullname} to be in `prometheus_metric_fullname_to_label_arg_map`, "
f"but it was not found. This is a problem with our custom mypy plugin. "
f"Please add it to the map."
)


def check_prometheus_metric_instantiation(
ctx: FunctionSigContext, fullname: str
) -> CallableType:
"""
Ensure that the `prometheus_client` metrics include the `SERVER_NAME_LABEL` label
when instantiated.
Expand All @@ -102,18 +271,49 @@ def check_prometheus_metric_instantiation(ctx: FunctionSigContext) -> CallableTy
Python garbage collection, and Twisted reactor tick time, which shouldn't have the
`SERVER_NAME_LABEL`. In those cases, use a type ignore comment to disable the
check, e.g. `# type: ignore[missing-server-name-label]`.

Args:
ctx: The `FunctionSigContext` from mypy.
fullname: The fully qualified name of the function being called,
e.g. `"prometheus_client.metrics.Counter"`
"""
# The true signature, this isn't being modified so this is what will be returned.
signature: CallableType = ctx.default_signature
signature = ctx.default_signature

# Find where the label names argument is in the function signature.
arg_location = prometheus_metric_fullname_to_label_arg_map.get(
fullname, Sentinel.UNSET_SENTINEL
)
assert arg_location is not Sentinel.UNSET_SENTINEL, (
f"Expected to find {fullname} in `prometheus_metric_fullname_to_label_arg_map`, "
f"but it was not found. This is a problem with our custom mypy plugin. "
f"Please add it to the map. Context: {ctx.context}"
)
# People should be using `# type: ignore[missing-server-name-label]` for
# process-level metrics that should not have the `SERVER_NAME_LABEL`.
if arg_location is None:
ctx.api.fail(
f"{signature.name} does not have a `labelnames`/`labels` argument "
"(if this is untrue, update `prometheus_metric_fullname_to_label_arg_map` "
"in our custom mypy plugin) and should probably have a type ignore comment, "
"e.g. `# type: ignore[missing-server-name-label]`. The reason we don't "
"automatically ignore this is the source of truth should be in the source code.",
ctx.context,
code=PROMETHEUS_METRIC_MISSING_SERVER_NAME_LABEL,
)
return signature

# Sanity check the arguments are still as expected in this version of
# `prometheus_client`. ex. `Counter(name, documentation, labelnames, ...)`
#
# `signature.arg_names` should be: ["name", "documentation", "labelnames", ...]
if len(signature.arg_names) < 3 or signature.arg_names[2] != "labelnames":
if (
len(signature.arg_names) < (arg_location.position + 1)
or signature.arg_names[arg_location.position] != arg_location.keyword_name
):
ctx.api.fail(
f"Expected the 3rd argument of {signature.name} to be 'labelnames', but got "
f"{signature.arg_names[2]}",
f"Expected argument number {arg_location.position + 1} of {signature.name} to be `labelnames`/`labels`, "
f"but got {signature.arg_names[arg_location.position]}",
ctx.context,
)
return signature
Expand All @@ -136,7 +336,11 @@ def check_prometheus_metric_instantiation(ctx: FunctionSigContext) -> CallableTy
# ...
# ]
# ```
labelnames_arg_expression = ctx.args[2][0] if len(ctx.args[2]) > 0 else None
labelnames_arg_expression = (
ctx.args[arg_location.position][0]
if len(ctx.args[arg_location.position]) > 0
else None
)
if isinstance(labelnames_arg_expression, (ListExpr, TupleExpr)):
# Check if the `labelnames` argument includes the `server_name` label (`SERVER_NAME_LABEL`).
for labelname_expression in labelnames_arg_expression.items:
Expand Down
25 changes: 18 additions & 7 deletions synapse/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,9 @@ class LaterGauge(Collector):
]

def collect(self) -> Iterable[Metric]:
g = GaugeMetricFamily(self.name, self.desc, labels=self.labels)
# The decision to add `SERVER_NAME_LABEL` is from the `LaterGauge` usage itself
# (we don't enforce it here, one level up).
g = GaugeMetricFamily(self.name, self.desc, labels=self.labels) # type: ignore[missing-server-name-label]

try:
calls = self.caller()
Expand Down Expand Up @@ -303,7 +305,9 @@ def collect(self) -> Iterable[Metric]:

Note: may be called by a separate thread.
"""
in_flight = GaugeMetricFamily(
# The decision to add `SERVER_NAME_LABEL` is from the `GaugeBucketCollector`
# usage itself (we don't enforce it here, one level up).
in_flight = GaugeMetricFamily( # type: ignore[missing-server-name-label]
self.name + "_total", self.desc, labels=self.labels
)

Expand All @@ -327,7 +331,9 @@ def collect(self) -> Iterable[Metric]:
yield in_flight

for name in self.sub_metrics:
gauge = GaugeMetricFamily(
# The decision to add `SERVER_NAME_LABEL` is from the `InFlightGauge` usage
# itself (we don't enforce it here, one level up).
gauge = GaugeMetricFamily( # type: ignore[missing-server-name-label]
"_".join([self.name, name]), "", labels=self.labels
)
for key, metrics in metrics_by_key.items():
Expand Down Expand Up @@ -422,7 +428,9 @@ def _values_to_metric(self, values: Iterable[float]) -> GaugeHistogramMetricFami
# that bucket or below.
accumulated_values = itertools.accumulate(bucket_values)

return GaugeHistogramMetricFamily(
# The decision to add `SERVER_NAME_LABEL` is from the `GaugeBucketCollector`
# usage itself (we don't enforce it here, one level up).
return GaugeHistogramMetricFamily( # type: ignore[missing-server-name-label]
self._name,
self._documentation,
buckets=list(
Expand Down Expand Up @@ -456,16 +464,19 @@ def collect(self) -> Iterable[Metric]:
line = s.read()
raw_stats = line.split(") ", 1)[1].split(" ")

user = GaugeMetricFamily("process_cpu_user_seconds_total", "")
# This is a process-level metric, so it does not have the `SERVER_NAME_LABEL`.
user = GaugeMetricFamily("process_cpu_user_seconds_total", "") # type: ignore[missing-server-name-label]
user.add_metric([], float(raw_stats[11]) / self.ticks_per_sec)
yield user

sys = GaugeMetricFamily("process_cpu_system_seconds_total", "")
# This is a process-level metric, so it does not have the `SERVER_NAME_LABEL`.
sys = GaugeMetricFamily("process_cpu_system_seconds_total", "") # type: ignore[missing-server-name-label]
sys.add_metric([], float(raw_stats[12]) / self.ticks_per_sec)
yield sys


REGISTRY.register(CPUMetrics())
# This is a process-level metric, so it does not have the `SERVER_NAME_LABEL`.
REGISTRY.register(CPUMetrics()) # type: ignore[missing-server-name-label]


#
Expand Down
15 changes: 10 additions & 5 deletions synapse/metrics/_gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@

class GCCounts(Collector):
def collect(self) -> Iterable[Metric]:
cm = GaugeMetricFamily("python_gc_counts", "GC object counts", labels=["gen"])
# This is a process-level metric, so it does not have the `SERVER_NAME_LABEL`.
cm = GaugeMetricFamily("python_gc_counts", "GC object counts", labels=["gen"]) # type: ignore[missing-server-name-label]
for n, m in enumerate(gc.get_count()):
cm.add_metric([str(n)], m)

Expand All @@ -102,7 +103,8 @@ def install_gc_manager() -> None:
if running_on_pypy:
return

REGISTRY.register(GCCounts())
# This is a process-level metric, so it does not have the `SERVER_NAME_LABEL`.
REGISTRY.register(GCCounts()) # type: ignore[missing-server-name-label]

gc.disable()

Expand Down Expand Up @@ -177,15 +179,17 @@ def collect(self) -> Iterable[Metric]:
#
# Total time spent in GC: 0.073 # s.total_gc_time

pypy_gc_time = CounterMetricFamily(
# This is a process-level metric, so it does not have the `SERVER_NAME_LABEL`.
pypy_gc_time = CounterMetricFamily( # type: ignore[missing-server-name-label]
"pypy_gc_time_seconds_total",
"Total time spent in PyPy GC",
labels=[],
)
pypy_gc_time.add_metric([], s.total_gc_time / 1000)
yield pypy_gc_time

pypy_mem = GaugeMetricFamily(
# This is a process-level metric, so it does not have the `SERVER_NAME_LABEL`.
pypy_mem = GaugeMetricFamily( # type: ignore[missing-server-name-label]
"pypy_memory_bytes",
"Memory tracked by PyPy allocator",
labels=["state", "class", "kind"],
Expand All @@ -209,4 +213,5 @@ def collect(self) -> Iterable[Metric]:


if running_on_pypy:
REGISTRY.register(PyPyGCStats())
# This is a process-level metric, so it does not have the `SERVER_NAME_LABEL`.
REGISTRY.register(PyPyGCStats()) # type: ignore[missing-server-name-label]
Loading
Loading