-
Notifications
You must be signed in to change notification settings - Fork 522
Refactor cache metrics to be homeserver-scoped #18604
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
ba3bbbb
749b7a4
8e71fcd
61fc9ba
74610aa
8dbca87
1e57b57
19c917c
0453666
a17206f
4fcfda0
d10d862
9895b3b
9eae037
ee91f6b
1917a0b
64ed156
e943bb1
b3ecd5c
f7e6f09
9078076
42699c4
22cee6f
f5d9558
2b13387
71fbcf6
6676f05
0bb7eae
b5f00c8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -66,6 +66,17 @@ | |||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||
| HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") | ||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||
| INSTANCE_LABEL_NAME = "instance" | ||||||||||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||||||||||
| The standard Prometheus label name used to identify which server instance the metrics | ||||||||||||||||||||||||||||||||||||||
| came from. | ||||||||||||||||||||||||||||||||||||||
| In the case of a Synapse homeserver, this should be set to the homeserver name | ||||||||||||||||||||||||||||||||||||||
| (`hs.hostname`). | ||||||||||||||||||||||||||||||||||||||
| Normally, this would be set automatically by the Prometheus server scraping the data but | ||||||||||||||||||||||||||||||||||||||
| since we support multiple instances of Synapse running in the same process and all | ||||||||||||||||||||||||||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. absolute nit, readability:
Suggested change
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is from an old version of the diff. This is the latest: synapse/synapse/metrics/__init__.py Lines 69 to 82 in fc10a5e
|
||||||||||||||||||||||||||||||||||||||
| metrics are in a single global `REGISTRY`, we need to manually label any metrics. | ||||||||||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||
| class _RegistryProxy: | ||||||||||||||||||||||||||||||||||||||
| @staticmethod | ||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,6 +31,7 @@ | |
| from prometheus_client.core import Gauge | ||
|
|
||
| from synapse.config.cache import add_resizable_cache | ||
| from synapse.metrics import INSTANCE_LABEL_NAME | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The metrics being refactored to be homeserver scoped are in this file. The rest of the changes are to support that change and supply the |
||
| from synapse.util.metrics import DynamicCollectorRegistry | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
@@ -46,50 +47,65 @@ | |
| caches_by_name: Dict[str, Sized] = {} | ||
|
|
||
| cache_size = Gauge( | ||
| "synapse_util_caches_cache_size", "", ["name"], registry=CACHE_METRIC_REGISTRY | ||
| "synapse_util_caches_cache_size", | ||
| "", | ||
| labelnames=["name", INSTANCE_LABEL_NAME], | ||
| registry=CACHE_METRIC_REGISTRY, | ||
| ) | ||
| cache_hits = Gauge( | ||
| "synapse_util_caches_cache_hits", "", ["name"], registry=CACHE_METRIC_REGISTRY | ||
| "synapse_util_caches_cache_hits", | ||
| "", | ||
| labelnames=["name", INSTANCE_LABEL_NAME], | ||
| registry=CACHE_METRIC_REGISTRY, | ||
| ) | ||
| cache_evicted = Gauge( | ||
| "synapse_util_caches_cache_evicted_size", | ||
| "", | ||
| ["name", "reason"], | ||
| labelnames=["name", "reason", INSTANCE_LABEL_NAME], | ||
| registry=CACHE_METRIC_REGISTRY, | ||
| ) | ||
| cache_total = Gauge( | ||
| "synapse_util_caches_cache", "", ["name"], registry=CACHE_METRIC_REGISTRY | ||
| "synapse_util_caches_cache", | ||
| "", | ||
| labelnames=["name", INSTANCE_LABEL_NAME], | ||
| registry=CACHE_METRIC_REGISTRY, | ||
| ) | ||
| cache_max_size = Gauge( | ||
| "synapse_util_caches_cache_max_size", "", ["name"], registry=CACHE_METRIC_REGISTRY | ||
| "synapse_util_caches_cache_max_size", | ||
| "", | ||
| labelnames=["name", INSTANCE_LABEL_NAME], | ||
| registry=CACHE_METRIC_REGISTRY, | ||
| ) | ||
| cache_memory_usage = Gauge( | ||
| "synapse_util_caches_cache_size_bytes", | ||
| "Estimated memory usage of the caches", | ||
| ["name"], | ||
| labelnames=["name", INSTANCE_LABEL_NAME], | ||
| registry=CACHE_METRIC_REGISTRY, | ||
| ) | ||
|
|
||
| response_cache_size = Gauge( | ||
| "synapse_util_caches_response_cache_size", | ||
| "", | ||
| ["name"], | ||
| labelnames=["name", INSTANCE_LABEL_NAME], | ||
| registry=CACHE_METRIC_REGISTRY, | ||
| ) | ||
| response_cache_hits = Gauge( | ||
| "synapse_util_caches_response_cache_hits", | ||
| "", | ||
| ["name"], | ||
| labelnames=["name", INSTANCE_LABEL_NAME], | ||
| registry=CACHE_METRIC_REGISTRY, | ||
| ) | ||
| response_cache_evicted = Gauge( | ||
| "synapse_util_caches_response_cache_evicted_size", | ||
| "", | ||
| ["name", "reason"], | ||
| labelnames=["name", "reason", INSTANCE_LABEL_NAME], | ||
| registry=CACHE_METRIC_REGISTRY, | ||
| ) | ||
| response_cache_total = Gauge( | ||
| "synapse_util_caches_response_cache", "", ["name"], registry=CACHE_METRIC_REGISTRY | ||
| "synapse_util_caches_response_cache", | ||
| "", | ||
| labelnames=["name", INSTANCE_LABEL_NAME], | ||
| registry=CACHE_METRIC_REGISTRY, | ||
| ) | ||
|
|
||
|
|
||
|
|
@@ -103,12 +119,13 @@ class EvictionReason(Enum): | |
| invalidation = auto() | ||
|
|
||
|
|
||
| @attr.s(slots=True, auto_attribs=True) | ||
| @attr.s(slots=True, auto_attribs=True, kw_only=True) | ||
| class CacheMetric: | ||
| _cache: Sized | ||
| _cache_type: str | ||
| _cache_name: str | ||
| _collect_callback: Optional[Callable] | ||
| _server_name: str | ||
|
|
||
| hits: int = 0 | ||
| misses: int = 0 | ||
|
|
@@ -145,34 +162,34 @@ def describe(self) -> List[str]: | |
|
|
||
| def collect(self) -> None: | ||
| try: | ||
| labels_base = { | ||
| "name": self._cache_name, | ||
| INSTANCE_LABEL_NAME: self._server_name, | ||
| } | ||
| if self._cache_type == "response_cache": | ||
| response_cache_size.labels(self._cache_name).set(len(self._cache)) | ||
| response_cache_hits.labels(self._cache_name).set(self.hits) | ||
| response_cache_size.labels(**labels_base).set(len(self._cache)) | ||
| response_cache_hits.labels(**labels_base).set(self.hits) | ||
| for reason in EvictionReason: | ||
| response_cache_evicted.labels(self._cache_name, reason.name).set( | ||
| self.eviction_size_by_reason[reason] | ||
| ) | ||
| response_cache_total.labels(self._cache_name).set( | ||
| self.hits + self.misses | ||
| ) | ||
| response_cache_evicted.labels( | ||
| {**labels_base, "reason": reason.name} | ||
| ).set(self.eviction_size_by_reason[reason]) | ||
| response_cache_total.labels(**labels_base).set(self.hits + self.misses) | ||
| else: | ||
| cache_size.labels(self._cache_name).set(len(self._cache)) | ||
| cache_hits.labels(self._cache_name).set(self.hits) | ||
| cache_size.labels(**labels_base).set(len(self._cache)) | ||
| cache_hits.labels(**labels_base).set(self.hits) | ||
| for reason in EvictionReason: | ||
| cache_evicted.labels(self._cache_name, reason.name).set( | ||
| cache_evicted.labels({**labels_base, "reason": reason.name}).set( | ||
| self.eviction_size_by_reason[reason] | ||
| ) | ||
| cache_total.labels(self._cache_name).set(self.hits + self.misses) | ||
| cache_total.labels(**labels_base).set(self.hits + self.misses) | ||
| max_size = getattr(self._cache, "max_size", None) | ||
| if max_size: | ||
| cache_max_size.labels(self._cache_name).set(max_size) | ||
| cache_max_size.labels(**labels_base).set(max_size) | ||
|
|
||
| if TRACK_MEMORY_USAGE: | ||
| # self.memory_usage can be None if nothing has been inserted | ||
| # into the cache yet. | ||
| cache_memory_usage.labels(self._cache_name).set( | ||
| self.memory_usage or 0 | ||
| ) | ||
| cache_memory_usage.labels(**labels_base).set(self.memory_usage or 0) | ||
| if self._collect_callback: | ||
| self._collect_callback() | ||
| except Exception as e: | ||
|
|
@@ -181,9 +198,11 @@ def collect(self) -> None: | |
|
|
||
|
|
||
| def register_cache( | ||
| *, | ||
| cache_type: str, | ||
| cache_name: str, | ||
| cache: Sized, | ||
| server_name: str, | ||
| collect_callback: Optional[Callable] = None, | ||
| resizable: bool = True, | ||
| resize_callback: Optional[Callable] = None, | ||
|
|
@@ -196,6 +215,8 @@ def register_cache( | |
| cache_name: name of the cache | ||
| cache: cache itself, which must implement __len__(), and may optionally implement | ||
| a max_size property | ||
| server_name: server_name: The homeserver name that this cache is associated with | ||
| (used to label the metric) (`hs.hostname`). | ||
| collect_callback: If given, a function which is called during metric | ||
| collection to update additional metrics. | ||
| resizable: Whether this cache supports being resized, in which case either | ||
|
|
@@ -210,7 +231,13 @@ def register_cache( | |
| resize_callback = cache.set_cache_factor # type: ignore | ||
| add_resizable_cache(cache_name, resize_callback) | ||
|
|
||
| metric = CacheMetric(cache, cache_type, cache_name, collect_callback) | ||
| metric = CacheMetric( | ||
| cache=cache, | ||
| cache_type=cache_type, | ||
| cache_name=cache_name, | ||
| server_name=server_name, | ||
| collect_callback=collect_callback, | ||
| ) | ||
| metric_name = "cache_%s_%s" % (cache_type, cache_name) | ||
| caches_by_name[cache_name] = cache | ||
| CACHE_METRIC_REGISTRY.register_hook(metric_name, metric.collect) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.