|
18 | 18 | # [This file includes modifications made by New Vector Limited] |
19 | 19 | # |
20 | 20 | # |
21 | | -from typing import Dict, Protocol, Tuple |
| 21 | +from typing import Dict, NoReturn, Protocol, Tuple |
22 | 22 |
|
23 | 23 | from prometheus_client.core import Sample |
24 | 24 |
|
|
27 | 27 | SERVER_NAME_LABEL, |
28 | 28 | InFlightGauge, |
29 | 29 | LaterGauge, |
| 30 | + all_later_gauges_to_clean_up_on_shutdown, |
30 | 31 | generate_latest, |
31 | 32 | ) |
32 | 33 | from synapse.util.caches.deferred_cache import DeferredCache |
@@ -292,42 +293,91 @@ def test_cache_metric_multiple_servers(self) -> None: |
292 | 293 |
|
293 | 294 |
|
294 | 295 | class LaterGaugeTests(unittest.HomeserverTestCase): |
| 296 | + def setUp(self) -> None: |
| 297 | + super().setUp() |
| 298 | + self.later_gauge = LaterGauge( |
| 299 | + name="foo", |
| 300 | + desc="", |
| 301 | + labelnames=[SERVER_NAME_LABEL], |
| 302 | + ) |
| 303 | + |
| 304 | + def tearDown(self) -> None: |
| 305 | + super().tearDown() |
| 306 | + |
| 307 | + REGISTRY.unregister(self.later_gauge) |
| 308 | + all_later_gauges_to_clean_up_on_shutdown.pop(self.later_gauge.name, None) |
| 309 | + |
295 | 310 | def test_later_gauge_multiple_servers(self) -> None: |
296 | 311 | """ |
297 | 312 | Test that LaterGauge metrics are reported correctly across multiple servers. We |
298 | 313 | will have an metrics entry for each homeserver that is labeled with the |
299 | 314 | `server_name` label. |
300 | 315 | """ |
301 | | - later_gauge = LaterGauge( |
302 | | - name="foo", |
303 | | - desc="", |
304 | | - labelnames=[SERVER_NAME_LABEL], |
305 | | - ) |
306 | | - later_gauge.register_hook( |
| 316 | + self.later_gauge.register_hook( |
307 | 317 | homeserver_instance_id="123", hook=lambda: {("hs1",): 1} |
308 | 318 | ) |
309 | | - later_gauge.register_hook( |
| 319 | + self.later_gauge.register_hook( |
310 | 320 | homeserver_instance_id="456", hook=lambda: {("hs2",): 2} |
311 | 321 | ) |
312 | 322 |
|
313 | 323 | metrics_map = get_latest_metrics() |
314 | 324 |
|
315 | | - # Find the metrics for the caches from both homeservers |
| 325 | + # Find the metrics from both homeservers |
316 | 326 | hs1_metric = 'foo{server_name="hs1"}' |
317 | 327 | hs1_metric_value = metrics_map.get(hs1_metric) |
318 | 328 | self.assertIsNotNone( |
319 | 329 | hs1_metric_value, |
320 | | - f"Missing metric {hs1_metric} in cache metrics {metrics_map}", |
| 330 | + f"Missing metric {hs1_metric} in metrics {metrics_map}", |
321 | 331 | ) |
| 332 | + self.assertEqual(hs1_metric_value, "1.0") |
| 333 | + |
322 | 334 | hs2_metric = 'foo{server_name="hs2"}' |
323 | 335 | hs2_metric_value = metrics_map.get(hs2_metric) |
324 | 336 | self.assertIsNotNone( |
325 | 337 | hs2_metric_value, |
326 | | - f"Missing metric {hs2_metric} in cache metrics {metrics_map}", |
| 338 | + f"Missing metric {hs2_metric} in metrics {metrics_map}", |
327 | 339 | ) |
| 340 | + self.assertEqual(hs2_metric_value, "2.0") |
328 | 341 |
|
329 | | - # Sanity check the metric values |
330 | | - self.assertEqual(hs1_metric_value, "1.0") |
| 342 | + def test_later_gauge_hook_exception(self) -> None: |
| 343 | + """ |
| 344 | + Test that LaterGauge metrics are collected across multiple servers even if one |
| 345 | + hooks is throwing an exception. |
| 346 | + """ |
| 347 | + |
| 348 | + def raise_exception() -> NoReturn: |
| 349 | + raise Exception("fake error generating data") |
| 350 | + |
| 351 | + # Make the hook for hs1 throw an exception |
| 352 | + self.later_gauge.register_hook( |
| 353 | + homeserver_instance_id="123", hook=raise_exception |
| 354 | + ) |
| 355 | + # Metrics from hs2 still work fine |
| 356 | + self.later_gauge.register_hook( |
| 357 | + homeserver_instance_id="456", hook=lambda: {("hs2",): 2} |
| 358 | + ) |
| 359 | + |
| 360 | + metrics_map = get_latest_metrics() |
| 361 | + |
| 362 | + # Since we encountered an exception while trying to collect metrics from hs1, we |
| 363 | + # don't expect to see it here. |
| 364 | + hs1_metric = 'foo{server_name="hs1"}' |
| 365 | + hs1_metric_value = metrics_map.get(hs1_metric) |
| 366 | + self.assertIsNone( |
| 367 | + hs1_metric_value, |
| 368 | + ( |
| 369 | + "Since we encountered an exception while trying to collect metrics from hs1" |
| 370 | + f"we don't expect to see it the metrics_map {metrics_map}" |
| 371 | + ), |
| 372 | + ) |
| 373 | + |
| 374 | + # We should still see metrics from hs2 though |
| 375 | + hs2_metric = 'foo{server_name="hs2"}' |
| 376 | + hs2_metric_value = metrics_map.get(hs2_metric) |
| 377 | + self.assertIsNotNone( |
| 378 | + hs2_metric_value, |
| 379 | + f"Missing metric {hs2_metric} in cache metrics {metrics_map}", |
| 380 | + ) |
331 | 381 | self.assertEqual(hs2_metric_value, "2.0") |
332 | 382 |
|
333 | 383 |
|
|
0 commit comments