Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
gen_ai_attributes as GenAI,
)
from opentelemetry.semconv.attributes import (
error_attributes as ErrorAttributes,
error_attributes,
)
from opentelemetry.trace import Span, SpanKind, Tracer, set_span_in_context
from opentelemetry.trace.status import Status, StatusCode
Expand Down Expand Up @@ -112,6 +112,6 @@ def handle_error(self, error: BaseException, run_id: UUID):
return
span.set_status(Status(StatusCode.ERROR, str(error)))
span.set_attribute(
ErrorAttributes.ERROR_TYPE, type(error).__qualname__
error_attributes.ERROR_TYPE, type(error).__qualname__
)
self.end_span(run_id)
4 changes: 3 additions & 1 deletion util/opentelemetry-util-genai/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

- Add support for `server.address`, `server.port` on all signals and additional metric-only attributes
([#4069](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4069))
- Log error when `fsspec` fails to be imported instead of silently failing ([#4037](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4037)).
- Minor change to check LRU cache in Completion Hook before acquiring semaphore/thread ([#3907](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3907)).
- Add environment variable for genai upload hook queue size
([https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3943](#3943))
- Add more Semconv attributes to LLMInvocation spans.
- Add more Semconv attributes to LLMInvocation spans.
([https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3862](#3862))
- Limit the upload hook thread pool to 64 workers
([https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3944](#3944))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@
from __future__ import annotations

import timeit
from numbers import Number
from typing import Dict, Optional

from opentelemetry.metrics import Histogram, Meter
from opentelemetry.semconv._incubating.attributes import (
gen_ai_attributes as GenAI,
)
from opentelemetry.semconv.attributes import (
error_attributes,
server_attributes,
)
from opentelemetry.trace import Span, set_span_in_context
from opentelemetry.util.genai.instruments import (
create_duration_histogram,
Expand All @@ -34,6 +37,9 @@ def record(
error_type: Optional[str] = None,
) -> None:
"""Record duration and token metrics for an invocation if possible."""

# pylint: disable=too-many-branches

if span is None:
return

Expand Down Expand Up @@ -64,6 +70,14 @@ def record(
attributes[GenAI.GEN_AI_RESPONSE_MODEL] = (
invocation.response_model_name
)
if invocation.server_address:
attributes[server_attributes.SERVER_ADDRESS] = (
invocation.server_address
)
if invocation.server_port is not None:
attributes[server_attributes.SERVER_PORT] = invocation.server_port
if invocation.metric_attributes:
attributes.update(invocation.metric_attributes)

# Calculate duration from span timing or invocation monotonic start
duration_seconds: Optional[float] = None
Expand All @@ -75,13 +89,9 @@ def record(

span_context = set_span_in_context(span)
if error_type:
attributes["error.type"] = error_type
attributes[error_attributes.ERROR_TYPE] = error_type

if (
duration_seconds is not None
and isinstance(duration_seconds, Number)
Comment thread
lmolkova marked this conversation as resolved.
and duration_seconds >= 0
):
if duration_seconds is not None:
self._duration_histogram.record(
duration_seconds,
attributes=attributes,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
gen_ai_attributes as GenAI,
)
from opentelemetry.semconv.attributes import (
error_attributes as ErrorAttributes,
error_attributes,
server_attributes,
)
from opentelemetry.trace import (
Span,
Expand Down Expand Up @@ -62,6 +63,15 @@ def _apply_common_span_attributes(
# TODO: clean provider name to match GenAiProviderNameValues?
span.set_attribute(GenAI.GEN_AI_PROVIDER_NAME, invocation.provider)

if invocation.server_address:
span.set_attribute(
server_attributes.SERVER_ADDRESS, invocation.server_address
)
if invocation.server_port:
span.set_attribute(
server_attributes.SERVER_PORT, invocation.server_port
)

_apply_response_attributes(span, invocation)


Expand Down Expand Up @@ -104,7 +114,9 @@ def _apply_error_attributes(span: Span, error: Error) -> None:
"""Apply status and error attributes common to error() paths."""
span.set_status(Status(StatusCode.ERROR, error.message))
if span.is_recording():
span.set_attribute(ErrorAttributes.ERROR_TYPE, error.type.__qualname__)
span.set_attribute(
error_attributes.ERROR_TYPE, error.type.__qualname__
)


def _apply_request_attributes(span: Span, invocation: LLMInvocation) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,26 @@ class LLMInvocation:
input_tokens: int | None = None
output_tokens: int | None = None
attributes: dict[str, Any] = field(default_factory=_new_str_any_dict)
"""
Comment thread
lmolkova marked this conversation as resolved.
Additional attributes to set on spans and/or events. These attributes
will not be set on metrics.
"""
metric_attributes: dict[str, Any] = field(
default_factory=_new_str_any_dict
)
"""
Additional attributes to set on metrics. Must be of a low cardinality.
These attributes will not be set on spans or events.
"""
temperature: float | None = None
top_p: float | None = None
frequency_penalty: float | None = None
presence_penalty: float | None = None
max_tokens: int | None = None
stop_sequences: list[str] | None = None
seed: int | None = None
server_address: str | None = None
server_port: int | None = None
# Monotonic start time in seconds (from timeit.default_timer) used
# for duration calculations to avoid mixing clock sources. This is
# populated by the TelemetryHandler when starting an invocation.
Expand Down
37 changes: 37 additions & 0 deletions util/opentelemetry-util-genai/tests/test_handler_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,43 @@ def test_stop_llm_records_duration_and_tokens(self) -> None:
places=3,
)

def test_stop_llm_records_duration_and_tokens_with_additional_attributes(
self,
) -> None:
handler = TelemetryHandler(
tracer_provider=self.tracer_provider,
meter_provider=self.meter_provider,
)

invocation = LLMInvocation(request_model="model", provider="prov")
invocation.input_tokens = 5
invocation.output_tokens = 7
invocation.server_address = "custom.server.com"
invocation.server_port = 42
handler.start_llm(invocation)
invocation.metric_attributes = {
"custom.attribute": "custom_value",
}
invocation.attributes = {"should not be on metrics": "value"}
handler.stop_llm(invocation)

metrics = self._harvest_metrics()
self.assertIn("gen_ai.client.operation.duration", metrics)
duration_points = metrics["gen_ai.client.operation.duration"]
self.assertIn("gen_ai.client.token.usage", metrics)
token_points = metrics["gen_ai.client.token.usage"]
points = duration_points + token_points

for point in points:
self.assertEqual(
point.attributes["server.address"], "custom.server.com"
)
self.assertEqual(point.attributes["server.port"], 42)
self.assertEqual(
point.attributes["custom.attribute"], "custom_value"
)
self.assertIsNone(point.attributes.get("should not be on metrics"))

def test_fail_llm_records_error_and_available_tokens(self) -> None:
handler = TelemetryHandler(
tracer_provider=self.tracer_provider,
Expand Down
33 changes: 31 additions & 2 deletions util/opentelemetry-util-genai/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@
gen_ai_attributes as GenAI,
)
from opentelemetry.semconv.attributes import (
error_attributes as ErrorAttributes,
error_attributes,
server_attributes,
)
from opentelemetry.semconv.schemas import Schemas
from opentelemetry.trace.status import StatusCode
Expand Down Expand Up @@ -107,6 +108,9 @@ def _assert_span_attributes(
) -> None:
for key, value in expected_values.items():
assert span_attrs.get(key) == value
assert len(span_attrs) == len(expected_values), (
f"Actual {span_attrs} are different than expected {expected_values}"
)


def _get_messages_from_attr(
Expand Down Expand Up @@ -215,11 +219,14 @@ def test_llm_start_and_stop_creates_span(self): # pylint: disable=no-self-use
"response_id": "response-id",
"input_tokens": 321,
"output_tokens": 654,
"server_address": "custom.server.com",
"server_port": 42,
}.items():
setattr(invocation, attr, value)
assert invocation.span is not None
invocation.output_messages = [chat_generation]
invocation.attributes.update({"extra": "info"})
invocation.metric_attributes = {"should not be on span": "value"}

span = _get_single_span(self.span_exporter)
self.assertEqual(span.name, "chat test-model")
Expand All @@ -231,7 +238,10 @@ def test_llm_start_and_stop_creates_span(self): # pylint: disable=no-self-use
span_attrs,
{
GenAI.GEN_AI_OPERATION_NAME: "chat",
GenAI.GEN_AI_REQUEST_MODEL: "test-model",
GenAI.GEN_AI_PROVIDER_NAME: "test-provider",
GenAI.GEN_AI_INPUT_MESSAGES: AnyNonNone(),
GenAI.GEN_AI_OUTPUT_MESSAGES: AnyNonNone(),
GenAI.GEN_AI_REQUEST_TEMPERATURE: 0.5,
GenAI.GEN_AI_REQUEST_TOP_P: 0.9,
GenAI.GEN_AI_REQUEST_STOP_SEQUENCES: ("stop",),
Expand All @@ -240,6 +250,8 @@ def test_llm_start_and_stop_creates_span(self): # pylint: disable=no-self-use
GenAI.GEN_AI_RESPONSE_ID: "response-id",
GenAI.GEN_AI_USAGE_INPUT_TOKENS: 321,
GenAI.GEN_AI_USAGE_OUTPUT_TOKENS: 654,
server_attributes.SERVER_ADDRESS: "custom.server.com",
server_attributes.SERVER_PORT: 42,
"extra": "info",
"custom_attr": "value",
},
Expand Down Expand Up @@ -286,6 +298,12 @@ def test_llm_manual_start_and_stop_creates_span(self):
_assert_span_attributes(
attrs,
{
GenAI.GEN_AI_OPERATION_NAME: "chat",
GenAI.GEN_AI_REQUEST_MODEL: "manual-model",
GenAI.GEN_AI_PROVIDER_NAME: "test-provider",
GenAI.GEN_AI_INPUT_MESSAGES: AnyNonNone(),
GenAI.GEN_AI_OUTPUT_MESSAGES: AnyNonNone(),
GenAI.GEN_AI_RESPONSE_FINISH_REASONS: ("stop",),
"manual": True,
"extra_manual": "yes",
},
Expand All @@ -312,6 +330,9 @@ def test_llm_span_finish_reasons_without_output_messages(self):
_assert_span_attributes(
attrs,
{
GenAI.GEN_AI_OPERATION_NAME: "chat",
GenAI.GEN_AI_REQUEST_MODEL: "model-without-output",
GenAI.GEN_AI_PROVIDER_NAME: "test-provider",
GenAI.GEN_AI_RESPONSE_FINISH_REASONS: ("length",),
GenAI.GEN_AI_RESPONSE_MODEL: "alt-model",
GenAI.GEN_AI_RESPONSE_ID: "resp-001",
Expand Down Expand Up @@ -457,13 +478,21 @@ class BoomError(RuntimeError):
_assert_span_attributes(
span_attrs,
{
ErrorAttributes.ERROR_TYPE: BoomError.__qualname__,
GenAI.GEN_AI_OPERATION_NAME: "chat",
GenAI.GEN_AI_REQUEST_MODEL: "test-model",
GenAI.GEN_AI_PROVIDER_NAME: "test-provider",
GenAI.GEN_AI_REQUEST_MAX_TOKENS: 128,
GenAI.GEN_AI_REQUEST_SEED: 123,
GenAI.GEN_AI_RESPONSE_FINISH_REASONS: ("error",),
GenAI.GEN_AI_RESPONSE_MODEL: "error-model",
GenAI.GEN_AI_RESPONSE_ID: "error-response",
GenAI.GEN_AI_USAGE_INPUT_TOKENS: 11,
GenAI.GEN_AI_USAGE_OUTPUT_TOKENS: 22,
error_attributes.ERROR_TYPE: BoomError.__qualname__,
},
)


class AnyNonNone:
def __eq__(self, other):
return other is not None