Skip to content

Commit 6284985

Browse files
add SynapseHistogram
1 parent 49eb3cc commit 6284985

13 files changed

Lines changed: 196 additions & 56 deletions

File tree

synapse/api/auth/__init__.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,11 @@
2020
#
2121
from typing import TYPE_CHECKING, Optional, Protocol
2222

23-
from prometheus_client import Histogram
24-
2523
from twisted.web.server import Request
2624

2725
from synapse.appservice import ApplicationService
2826
from synapse.http.site import SynapseRequest
29-
from synapse.metrics import SERVER_NAME_LABEL
27+
from synapse.metrics import SERVER_NAME_LABEL, SynapseHistogram
3028
from synapse.types import Requester
3129

3230
if TYPE_CHECKING:
@@ -36,7 +34,7 @@
3634
GUEST_DEVICE_ID = "guest_device"
3735

3836

39-
introspection_response_timer = Histogram(
37+
introspection_response_timer = SynapseHistogram(
4038
"synapse_api_auth_delegated_introspection_response",
4139
"Time taken to get a response for an introspection request",
4240
labelnames=["code", SERVER_NAME_LABEL],

synapse/federation/federation_server.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,6 @@
3232
Union,
3333
)
3434

35-
from prometheus_client import Histogram
36-
3735
from twisted.python import failure
3836

3937
from synapse.api.constants import (
@@ -79,7 +77,12 @@
7977
tag_args,
8078
trace,
8179
)
82-
from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, SynapseGauge
80+
from synapse.metrics import (
81+
SERVER_NAME_LABEL,
82+
SynapseCounter,
83+
SynapseGauge,
84+
SynapseHistogram,
85+
)
8386
from synapse.metrics.background_process_metrics import wrap_as_background_process
8487
from synapse.replication.http.federation import (
8588
ReplicationFederationSendEduRestServlet,
@@ -116,7 +119,7 @@
116119
labelnames=["type", SERVER_NAME_LABEL],
117120
)
118121

119-
pdu_process_time = Histogram(
122+
pdu_process_time = SynapseHistogram(
120123
"synapse_federation_server_pdu_process_time",
121124
"Time taken to process an event",
122125
labelnames=[SERVER_NAME_LABEL],

synapse/handlers/federation.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
)
3737

3838
import attr
39-
from prometheus_client import Histogram
4039
from signedjson.key import decode_verify_key_bytes
4140
from signedjson.sign import verify_signed_json
4241
from unpaddedbase64 import decode_base64
@@ -67,7 +66,7 @@
6766
from synapse.http.servlet import assert_params_in_dict
6867
from synapse.logging.context import nested_logging_context
6968
from synapse.logging.opentracing import SynapseTags, set_tag, tag_args, trace
70-
from synapse.metrics import SERVER_NAME_LABEL
69+
from synapse.metrics import SERVER_NAME_LABEL, SynapseHistogram
7170
from synapse.module_api import NOT_SPAM
7271
from synapse.storage.databases.main.events_worker import EventRedactBehaviour
7372
from synapse.storage.invite_rule import InviteRule
@@ -83,7 +82,7 @@
8382
logger = logging.getLogger(__name__)
8483

8584
# Added to debug performance and track progress on optimizations
86-
backfill_processing_before_timer = Histogram(
85+
backfill_processing_before_timer = SynapseHistogram(
8786
"synapse_federation_backfill_processing_before_time_seconds",
8887
"sec",
8988
labelnames=[SERVER_NAME_LABEL],
@@ -101,7 +100,6 @@
101100
40.0,
102101
60.0,
103102
80.0,
104-
"+Inf",
105103
),
106104
)
107105

synapse/handlers/federation_event.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,6 @@
3232
Sequence,
3333
)
3434

35-
from prometheus_client import Histogram
36-
3735
from synapse import event_auth
3836
from synapse.api.constants import (
3937
EventContentFields,
@@ -76,7 +74,7 @@
7674
tag_args,
7775
trace,
7876
)
79-
from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter
77+
from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, SynapseHistogram
8078
from synapse.replication.http.federation import (
8179
ReplicationFederationSendEventsRestServlet,
8280
)
@@ -109,7 +107,7 @@
109107
)
110108

111109
# Added to debug performance and track progress on optimizations
112-
backfill_processing_after_timer = Histogram(
110+
backfill_processing_after_timer = SynapseHistogram(
113111
"synapse_federation_backfill_processing_after_time_seconds",
114112
"sec",
115113
labelnames=[SERVER_NAME_LABEL],
@@ -134,7 +132,6 @@
134132
120.0,
135133
150.0,
136134
180.0,
137-
"+Inf",
138135
),
139136
)
140137

synapse/handlers/sliding_sync/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
from itertools import chain
1818
from typing import TYPE_CHECKING, AbstractSet, Mapping, Optional
1919

20-
from prometheus_client import Histogram
2120
from typing_extensions import assert_never
2221

2322
from synapse.api.constants import Direction, EventTypes, Membership
@@ -38,7 +37,7 @@
3837
tag_args,
3938
trace,
4039
)
41-
from synapse.metrics import SERVER_NAME_LABEL
40+
from synapse.metrics import SERVER_NAME_LABEL, SynapseHistogram
4241
from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary
4342
from synapse.storage.databases.main.state_deltas import StateDelta
4443
from synapse.storage.databases.main.stream import PaginateFunction
@@ -77,7 +76,7 @@
7776
logger = logging.getLogger(__name__)
7877

7978

80-
sync_processing_time = Histogram(
79+
sync_processing_time = SynapseHistogram(
8180
"synapse_sliding_sync_processing_time",
8281
"Time taken to generate a sliding sync response, ignoring wait times.",
8382
labelnames=["initial", SERVER_NAME_LABEL],

synapse/http/request_metrics.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,13 @@
2424
import traceback
2525
from typing import Mapping
2626

27-
from prometheus_client.core import Histogram
28-
2927
from synapse.logging.context import current_context
30-
from synapse.metrics import SERVER_NAME_LABEL, LaterGauge, SynapseCounter
28+
from synapse.metrics import (
29+
SERVER_NAME_LABEL,
30+
LaterGauge,
31+
SynapseCounter,
32+
SynapseHistogram,
33+
)
3134

3235
logger = logging.getLogger(__name__)
3336

@@ -51,7 +54,7 @@
5154
labelnames=["method", "code", SERVER_NAME_LABEL],
5255
)
5356

54-
response_timer = Histogram(
57+
response_timer = SynapseHistogram(
5558
"synapse_http_server_response_time_seconds",
5659
"sec",
5760
labelnames=["method", "servlet", "tag", "code", SERVER_NAME_LABEL],

synapse/metrics/__init__.py

Lines changed: 145 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,17 +52,20 @@
5252
from packaging.version import parse as parse_version
5353
from prometheus_client import (
5454
CollectorRegistry,
55-
Histogram,
5655
Metric,
5756
generate_latest,
57+
values,
5858
)
59+
from prometheus_client.context_managers import Timer
5960
from prometheus_client.core import (
6061
REGISTRY,
6162
GaugeHistogramMetricFamily,
6263
GaugeMetricFamily,
6364
Sample,
6465
)
6566
from prometheus_client.metrics import _get_use_created
67+
from prometheus_client.samples import Exemplar
68+
from prometheus_client.utils import INF, floatToGoString
6669
from prometheus_client.values import ValueClass
6770
from typing_extensions import Dict, Self
6871

@@ -526,7 +529,7 @@ def __init__(
526529
self._gauge_value: float = 0
527530

528531
if not self._labelvalues and self._registry:
529-
# TODO: look into what to do here
532+
# TODO: look into what to do here, and maybe move it to the wrapperbase?
530533
self._registry.register(self) # type: ignore
531534

532535
def set(self, value: float) -> None:
@@ -596,6 +599,143 @@ def _metric_init(self) -> None:
596599
)
597600

598601

602+
class SynapseHistogram(SynapseMetricWrapperBase):
603+
_type = "histogram"
604+
_reserved_labelnames = ["le"]
605+
DEFAULT_BUCKETS = (
606+
0.005,
607+
0.01,
608+
0.025,
609+
0.05,
610+
0.075,
611+
0.1,
612+
0.25,
613+
0.5,
614+
0.75,
615+
1.0,
616+
2.5,
617+
5.0,
618+
7.5,
619+
10.0,
620+
INF,
621+
)
622+
623+
def __init__(
624+
self,
625+
name: str,
626+
documentation: str,
627+
labelnames: Iterable[str] = (),
628+
namespace: str = "",
629+
subsystem: str = "",
630+
unit: str = "",
631+
registry: Optional[CollectorRegistry] = REGISTRY,
632+
_labelvalues: Optional[Sequence[str]] = None,
633+
buckets: Sequence[float] = DEFAULT_BUCKETS,
634+
):
635+
self._prepare_buckets(buckets)
636+
super().__init__(
637+
name=name,
638+
documentation=documentation,
639+
labelnames=labelnames,
640+
namespace=namespace,
641+
subsystem=subsystem,
642+
unit=unit,
643+
registry=registry,
644+
_labelvalues=_labelvalues,
645+
)
646+
self._histogram = meter.create_histogram(
647+
self._name,
648+
unit=self._unit,
649+
description=self._documentation,
650+
explicit_bucket_boundaries_advisory=buckets,
651+
)
652+
self._kwargs["buckets"] = buckets
653+
654+
def _prepare_buckets(self, source_buckets: Sequence[Union[float, str]]) -> None:
655+
buckets = [float(b) for b in source_buckets]
656+
if buckets != sorted(buckets):
657+
# This is probably an error on the part of the user,
658+
# so raise rather than sorting for them.
659+
raise ValueError("Buckets not in sorted order")
660+
if buckets and buckets[-1] != INF:
661+
buckets.append(INF)
662+
if len(buckets) < 2:
663+
raise ValueError("Must have at least two buckets")
664+
self._upper_bounds = buckets
665+
666+
def _metric_init(self) -> None:
667+
self._buckets: list[values.ValueClass] = []
668+
self._created = time()
669+
bucket_labelnames = self._labelnames + ("le",)
670+
self._sum = values.ValueClass(
671+
self._type,
672+
self._name,
673+
self._name + "_sum",
674+
self._labelnames,
675+
self._labelvalues,
676+
self._documentation,
677+
)
678+
for b in self._upper_bounds:
679+
self._buckets.append(
680+
values.ValueClass(
681+
self._type,
682+
self._name,
683+
self._name + "_bucket",
684+
bucket_labelnames,
685+
self._labelvalues + (floatToGoString(b),),
686+
self._documentation,
687+
)
688+
)
689+
690+
def observe(self, amount: float, exemplar: Optional[Dict[str, str]] = None) -> None:
691+
"""Observe the given amount.
692+
693+
The amount is usually positive or zero. Negative values are
694+
accepted but prevent current versions of Prometheus from
695+
properly detecting counter resets in the sum of
696+
observations. See
697+
https://prometheus.io/docs/practices/histograms/#count-and-sum-of-observations
698+
for details.
699+
"""
700+
self._raise_if_not_observable()
701+
self._sum.inc(amount)
702+
for i, bound in enumerate(self._upper_bounds):
703+
if amount <= bound:
704+
self._buckets[i].inc(1)
705+
if exemplar:
706+
# _validate_exemplar(exemplar)
707+
self._buckets[i].set_exemplar(Exemplar(exemplar, amount, time()))
708+
break
709+
710+
def time(self) -> Timer:
711+
"""Time a block of code or function, and observe the duration in seconds.
712+
713+
Can be used as a function decorator or context manager.
714+
"""
715+
return Timer(self, "observe")
716+
717+
def _child_samples(self) -> Iterable[Sample]:
718+
samples = []
719+
acc = 0.0
720+
for i, bound in enumerate(self._upper_bounds):
721+
acc += self._buckets[i].get()
722+
samples.append(
723+
Sample(
724+
"_bucket",
725+
{"le": floatToGoString(bound)},
726+
acc,
727+
None,
728+
self._buckets[i].get_exemplar(),
729+
)
730+
)
731+
samples.append(Sample("_count", {}, acc, None, None))
732+
if self._upper_bounds[0] >= 0:
733+
samples.append(Sample("_sum", {}, self._sum.get(), None, None))
734+
if _get_use_created():
735+
samples.append(Sample("_created", {}, self._created, None, None))
736+
return tuple(samples)
737+
738+
599739
@attr.s(slots=True, hash=True, auto_attribs=True, kw_only=True)
600740
class LaterGauge(Collector):
601741
"""A Gauge which periodically calls a user-provided callback to produce metrics."""
@@ -1091,7 +1231,7 @@ def collect(self) -> Iterable[Metric]:
10911231
"synapse_event_processing_lag", "", labelnames=["name", SERVER_NAME_LABEL]
10921232
)
10931233

1094-
event_processing_lag_by_event = Histogram(
1234+
event_processing_lag_by_event = SynapseHistogram(
10951235
"synapse_event_processing_lag_by_event",
10961236
"Time between an event being persisted and it being queued up to be sent to the relevant remote servers",
10971237
labelnames=["name", SERVER_NAME_LABEL],
@@ -1119,7 +1259,7 @@ def collect(self) -> Iterable[Metric]:
11191259
)
11201260

11211261
# 3PID send info
1122-
threepid_send_requests = Histogram(
1262+
threepid_send_requests = SynapseHistogram(
11231263
"synapse_threepid_send_requests_with_tries",
11241264
documentation="Number of requests for a 3pid token by try count. Note if"
11251265
" there is a request with try count of 4, then there would have been one"
@@ -1219,4 +1359,5 @@ def render_GET(self, request: Request) -> bytes:
12191359
"install_gc_manager",
12201360
"SynapseCounter",
12211361
"SynapseGauge",
1362+
"SynapseHistogram",
12221363
]

synapse/replication/tcp/external_cache.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,9 @@
2222
import logging
2323
from typing import TYPE_CHECKING, Any, Optional
2424

25-
from prometheus_client import Histogram
26-
2725
from synapse.logging import opentracing
2826
from synapse.logging.context import make_deferred_yieldable
29-
from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter
27+
from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, SynapseHistogram
3028
from synapse.util.json import json_decoder, json_encoder
3129

3230
if TYPE_CHECKING:
@@ -46,7 +44,7 @@
4644
labelnames=["cache_name", "hit", SERVER_NAME_LABEL],
4745
)
4846

49-
response_timer = Histogram(
47+
response_timer = SynapseHistogram(
5048
"synapse_external_cache_response_time_seconds",
5149
"Time taken to get a response from Redis for a cache get/set request",
5250
labelnames=["method", SERVER_NAME_LABEL],

0 commit comments

Comments
 (0)