Skip to content

Commit a281776

Browse files
committed
Integrate Exemplars with Python SDK
1 parent 2a55926 commit a281776

11 files changed

Lines changed: 1733 additions & 15 deletions

File tree

docs/examples/basic_meter/view.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@
8282
# parameter to specify the bucket ranges
8383
size_view = View(
8484
requests_size,
85-
HistogramAggregator(config=[20, 40, 60, 80, 100]),
85+
HistogramAggregator(config={"bounds": [20, 40, 60, 80, 100]}),
8686
label_keys=["environment"], # is not used due to ViewConfig.UNGROUPED
8787
config=ViewConfig.UNGROUPED,
8888
)

docs/examples/exemplars/.ipynb_checkpoints/statistical_exemplars-checkpoint.ipynb

Lines changed: 359 additions & 0 deletions
Large diffs are not rendered by default.

docs/examples/exemplars/README.rst

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
OpenTelemetry Exemplars Example
2+
===============================
3+
4+
Exemplars are example measurements for aggregations. While they are simple conceptually, exemplars can estimate any statistic about the input distribution, can provide links to sample traces for high latency requests, and much more.
5+
For more information about exemplars and how they work in OpenTelemetry, see the `spec <https://github.com/open-telemetry/oteps/pull/113>`_
6+
7+
Examples
8+
--------
9+
10+
Installation
11+
12+
.. code-block:: sh
13+
14+
pip install opentelemetry-api
15+
pip install opentelemetry-sdk
16+
pip install matplotlib # may have to install Qt as well
17+
pip install numpy
18+
19+
pip install opentelemetry-exporter-cloud-monitoring # if you want to export exemplars to cloud monitoring
20+
21+
Statistical exemplars
22+
^^^^^^^^^^^^^^^^^^^^^
23+
24+
The opentelemetry SDK provides a way to sample exemplars statistically:
25+
26+
- Exemplars will be picked to represent the input distribution, without unquantifiable bias
27+
- A "sample_count" attribute will be set on each exemplar to quantify how many measurements each exemplar represents
28+
29+
See 'statistical_exemplars.ipynb' for the example (TODO: how do I link this?)
30+
31+
Semantic exemplars
32+
^^^^^^^^^^^^^^^^^^
33+
34+
Semantic exemplars are exemplars that have not been sampled statistically,
35+
but instead aim to provide value as individual exemplars.
36+
They will have a trace id/span id attached for the active trace when the exemplar was recorded,
37+
and they may focus on measurements with abnormally high/low values.
38+
39+
'semantic_exemplars.py' shows how to generate exemplars for a histogram aggregation.
40+
Currently only the Google Cloud Monitoring exporter supports uploading these exemplars.
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Copyright The OpenTelemetry Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
"""
16+
This example shows how to generate "semantic" exemplars for a histogram, and how to export them to Google Cloud Monitoring.
17+
"""
18+
19+
import random
20+
import time
21+
22+
from opentelemetry import metrics
23+
from opentelemetry.sdk.metrics import (
24+
MeterProvider,
25+
ValueRecorder,
26+
)
27+
from opentelemetry.sdk.metrics.export import ConsoleMetricsExporter
28+
from opentelemetry.sdk.metrics.export.aggregate import (
29+
HistogramAggregator,
30+
)
31+
from opentelemetry.sdk.metrics.view import View, ViewConfig
32+
33+
# Set up OpenTelemetry metrics
34+
metrics.set_meter_provider(MeterProvider(stateful=False))
35+
meter = metrics.get_meter(__name__)
36+
37+
# Use the Google Cloud Monitoring Metrics Exporter since its the only one that currently supports exemplars
38+
metrics.get_meter_provider().start_pipeline(meter, ConsoleMetricsExporter(), 10)
39+
40+
# Create our duration metric
41+
request_duration = meter.create_metric(
42+
name="request_duration",
43+
description="duration (ms) of incoming requests",
44+
unit="ms",
45+
value_type=int,
46+
metric_type=ValueRecorder,
47+
)
48+
49+
# Add a Histogram view to our duration metric, and make it generate 1 exemplars per bucket
50+
duration_view = View(
51+
request_duration,
52+
# Latency in buckets:
53+
# [>=0ms, >=25ms, >=50ms, >=75ms, >=100ms, >=200ms, >=400ms, >=600ms, >=800ms, >=1s, >=2s, >=4s, >=6s]
54+
# We want to generate 1 exemplar per bucket, where each exemplar has a linked trace that was recorded.
55+
# So we need to set num_exemplars to 1 and not specify statistical_exemplars (defaults to false)
56+
HistogramAggregator(config={"bounds": [0, 25, 50, 75, 100, 200, 400, 600, 800, 1000, 2000, 4000, 6000],
57+
"num_exemplars": 1}),
58+
label_keys=["environment"],
59+
config=ViewConfig.LABEL_KEYS,
60+
)
61+
62+
meter.register_view(duration_view)
63+
64+
for i in range(100):
65+
# Generate some random data for the histogram with a dropped label "customer_id"
66+
request_duration.record(random.randint(1, 8000), {"environment": "staging", "customer_id": random.randint(1, 100)})
67+
time.sleep(1)

docs/examples/exemplars/statistical_exemplars.ipynb

Lines changed: 339 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
import random
4+
5+
from collections import defaultdict
6+
7+
from opentelemetry import metrics
8+
from opentelemetry.sdk.metrics import Counter, MeterProvider
9+
from opentelemetry.sdk.metrics.export.aggregate import SumAggregator
10+
from opentelemetry.sdk.metrics.export.controller import PushController
11+
from opentelemetry.sdk.metrics.export.in_memory_metrics_exporter import InMemoryMetricsExporter
12+
from opentelemetry.sdk.metrics.view import View, ViewConfig
13+
14+
## set up opentelemetry
15+
16+
# Sets the global MeterProvider instance
17+
metrics.set_meter_provider(MeterProvider())
18+
19+
meter = metrics.get_meter(__name__)
20+
21+
# Export to a python list so we can do stats with the data
22+
exporter = InMemoryMetricsExporter()
23+
24+
# instead of waiting for the controller to tick over time, we will just tick it ourselves
25+
controller = PushController(meter, exporter, 500)
26+
27+
# Create the metric that we will use
28+
bytes_counter = meter.create_metric(
29+
name="bytes_counter",
30+
description="Number of bytes received by service",
31+
unit="By",
32+
value_type=int,
33+
metric_type=Counter,
34+
)
35+
36+
# Every time interval we will collect 100 exemplars statistically (selected without bias)
37+
aggregator_config = {"num_exemplars": 100, "statistical_exemplars": True}
38+
39+
# Assign a Sum aggregator to `bytes_counter` that collects exemplars
40+
counter_view = View(
41+
bytes_counter,
42+
SumAggregator(config=aggregator_config),
43+
label_keys=["environment"],
44+
config=ViewConfig.LABEL_KEYS,
45+
)
46+
47+
meter.register_view(counter_view)
48+
49+
## generate the random metric data
50+
51+
def unknown_customer_calls():
52+
"""Generate customer call data to our application"""
53+
54+
# set a random seed for consistency of data for example purposes
55+
np.random.seed(1)
56+
# Make exemplar selection consistent for example purposes
57+
random.seed(1)
58+
59+
# customer 123 is a big user, and made 1000 requests in this timeframe
60+
requests = np.random.normal(1000, 250, 1000) # 1000 requests with average 1000 bytes, covariance 100
61+
62+
for request in requests:
63+
bytes_counter.add(int(request), {"environment": "production", "method": "REST", "customer_id": 123})
64+
65+
# customer 247 is another big user, making fewer, but bigger requests
66+
requests = np.random.normal(5000, 1250, 200) # 200 requests with average size of 5k bytes
67+
68+
for request in requests:
69+
bytes_counter.add(int(request), {"environment": "production", "method": "REST", "customer_id": 247})
70+
71+
# There are many other smaller customers
72+
for customer_id in range(250):
73+
requests = np.random.normal(1000, 250, np.random.randint(1, 10))
74+
method = "REST" if np.random.randint(2) else "gRPC"
75+
for request in requests:
76+
bytes_counter.add(int(request), {"environment": "production", "method": method, "customer_id": customer_id})
77+
78+
unknown_customer_calls()
79+
80+
# Tick the controller so it sends metrics to the exporter
81+
controller.tick()
82+
83+
# collect metrics from our exporter
84+
metric_data = exporter.get_exported_metrics()
85+
86+
# get the exemplars from the bytes_in counter aggregator
87+
aggregator = metric_data[0].aggregator
88+
exemplars = aggregator.checkpoint_exemplars
89+
90+
# Sum up the total bytes in per customer from all of the exemplars collected
91+
customer_bytes_map = defaultdict(int)
92+
for exemplar in exemplars:
93+
customer_bytes_map[exemplar.dropped_labels] += exemplar.value
94+
95+
96+
customer_bytes_list = sorted(list(customer_bytes_map.items()), key=lambda t: t[1], reverse=True)
97+
98+
# Save our top 5 customers and sum all of the rest into "Others".
99+
top_5_customers = [("Customer {}".format(dict(val[0])["customer_id"]), val[1]) for val in customer_bytes_list[:5]] + [("Other Customers", sum([val[1] for val in customer_bytes_list[5:]]))]
100+
101+
# unzip the data into X (sizes of each customer's contribution) and labels
102+
labels, X = zip(*top_5_customers)
103+
104+
# create the chart with matplotlib and show it
105+
plt.pie(X, labels=labels)
106+
plt.show()
107+
108+
# Estimate how many bytes customer 123 sent
109+
customer_123_bytes = customer_bytes_map[(("customer_id", 123), ("method", "REST"))]
110+
111+
# Since the exemplars were randomly sampled, all sample_counts will be the same
112+
sample_count = exemplars[0].sample_count
113+
print("sample count", sample_count, "custmer", customer_123_bytes)
114+
full_customer_123_bytes = sample_count * customer_123_bytes
115+
116+
# With seed == 1 we get 1008612 - quite close to the statistical mean of 1000000! (more exemplars would make this estimation even more accurate)
117+
print("Customer 123 sent about {} bytes this interval".format(int(full_customer_123_bytes)))
118+
119+
# Determine the top 25 customers by how many bytes they sent in exemplars
120+
top_25_customers = customer_bytes_list[:25]
121+
122+
# out of those 25 customers, determine how many used grpc, and come up with a ratio
123+
percent_grpc = len(list(filter(lambda customer_value: customer_value[0][1][1] == "gRPC", top_25_customers))) / len(top_25_customers)
124+
125+
print("~{}% of the top 25 customers (by bytes in) used gRPC this interval".format(int(percent_grpc*100)))
126+
127+
# Determine the 50th, 90th, and 99th percentile of byte size sent in
128+
quantiles = np.quantile([exemplar.value for exemplar in exemplars], [0.5, 0.9, 0.99])
129+
print("50th Percentile Bytes In:", int(quantiles[0]))
130+
print("90th Percentile Bytes In:", int(quantiles[1]))
131+
print("99th Percentile Bytes In:", int(quantiles[2]))

opentelemetry-sdk/src/opentelemetry/sdk/metrics/export/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,12 @@ def export(
7777
) -> "MetricsExportResult":
7878
for record in metric_records:
7979
print(
80-
'{}(data="{}", labels="{}", value={})'.format(
80+
'{}(data="{}", labels="{}", value={}, exemplars={})'.format(
8181
type(self).__name__,
8282
record.instrument,
8383
record.labels,
8484
record.aggregator.checkpoint,
85+
record.aggregator.checkpoint_exemplars
8586
)
8687
)
8788
return MetricsExportResult.SUCCESS

0 commit comments

Comments
 (0)