Skip to content

Commit cf3108c

Browse files
ci-operatorclaude
andcommitted
feat: add distributed tracing for webhook handling and PipelineRun timing
Emit a PipelinesAsCode:ProcessEvent span covering the full webhook event lifecycle. Emit waitDuration and executeDuration timing spans for completed PipelineRuns. Propagate trace context onto created PipelineRuns via the tekton.dev/pipelinerunSpanContext annotation. Configure the Knative observability framework to read tracing config from the pipelines-as-code-config-observability ConfigMap. Add tracing configuration guide and config examples. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 56a3a61 commit cf3108c

File tree

13 files changed

+759
-8
lines changed

13 files changed

+759
-8
lines changed

cmd/pipelines-as-code-controller/main.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ func main() {
4040
loggerConfigurator := evadapter.NewLoggerConfiguratorFromConfigMap(PACControllerLogKey, loggerConfiguratorOpt)
4141
copts := []evadapter.ConfiguratorOption{
4242
evadapter.WithLoggerConfigurator(loggerConfigurator),
43-
evadapter.WithObservabilityConfigurator(evadapter.NewObservabilityConfiguratorFromConfigMap()),
43+
evadapter.WithObservabilityConfigurator(evadapter.NewObservabilityConfiguratorFromConfigMap(
44+
evadapter.WithObservabilityConfiguratorConfigMapName("pipelines-as-code-config-observability"),
45+
)),
4446
evadapter.WithCloudEventsStatusReporterConfigurator(evadapter.NewCloudEventsReporterConfiguratorFromConfigMap()),
4547
}
4648
// put logger configurator to ctx

config/305-config-observability.yaml

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,18 @@ data:
5050
5151
# metrics-export-interval specifies how often metrics are exported.
5252
# Only applicable for grpc and http/protobuf protocols.
53-
# metrics-export-interval: "30s"
53+
# metrics-export-interval: "30s"
54+
55+
# tracing-protocol specifies the trace export protocol.
56+
# Supported values: "grpc", "http/protobuf", "none".
57+
# Default is "none" (tracing disabled).
58+
# tracing-protocol: "none"
59+
60+
# tracing-endpoint specifies the OTLP collector endpoint.
61+
# Required when tracing-protocol is "grpc" or "http/protobuf".
62+
# The OTEL_EXPORTER_OTLP_ENDPOINT env var takes precedence if set.
63+
# tracing-endpoint: "http://otel-collector.observability.svc.cluster.local:4317"
64+
65+
# tracing-sampling-rate controls the fraction of traces sampled.
66+
# 0.0 = none, 1.0 = all. Default is 0 (none).
67+
# tracing-sampling-rate: "1.0"
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
---
2+
title: Distributed Tracing
3+
weight: 5
4+
---
5+
6+
This page describes how to enable OpenTelemetry distributed tracing for Pipelines-as-Code. When enabled, PaC emits trace spans for webhook event processing and PipelineRun lifecycle timing.
7+
8+
## Enabling tracing
9+
10+
The ConfigMap `pipelines-as-code-config-observability` controls tracing configuration. See [config/305-config-observability.yaml](https://github.com/tektoncd/pipelines-as-code/blob/main/config/305-config-observability.yaml) for the full example.
11+
12+
It contains the following tracing fields:
13+
14+
* `tracing-protocol`: Export protocol. Supported values: `grpc`, `http/protobuf`, `none`. Default is `none` (tracing disabled).
15+
* `tracing-endpoint`: OTLP collector endpoint. Required when protocol is not `none`. The `OTEL_EXPORTER_OTLP_ENDPOINT` environment variable takes precedence if set.
16+
* `tracing-sampling-rate`: Fraction of traces to sample. `0.0` = none, `1.0` = all. Default is `0`.
17+
18+
### Example
19+
20+
```yaml
21+
apiVersion: v1
22+
kind: ConfigMap
23+
metadata:
24+
name: pipelines-as-code-config-observability
25+
namespace: pipelines-as-code
26+
data:
27+
tracing-protocol: grpc
28+
tracing-endpoint: "http://otel-collector.observability.svc.cluster.local:4317"
29+
tracing-sampling-rate: "1.0"
30+
```
31+
32+
Changes to the ConfigMap are picked up automatically without restarting the controller. Set `tracing-protocol` to `none` or remove the tracing keys to disable tracing.
33+
34+
## Emitted spans
35+
36+
The controller emits a `PipelinesAsCode:ProcessEvent` span covering the full lifecycle of each webhook event, from receipt through PipelineRun creation. The watcher emits `waitDuration` and `executeDuration` spans for completed PipelineRuns, using the PipelineRun's actual timestamps for accurate wall-clock timing.
37+
38+
## Trace context propagation
39+
40+
When Pipelines-as-Code creates a PipelineRun, it sets the `tekton.dev/pipelinerunSpanContext` annotation with a JSON-encoded OTel TextMapCarrier containing the W3C `traceparent`. PaC tracing works independently — you get PaC spans regardless of whether Tekton Pipelines has tracing enabled.
41+
42+
If Tekton Pipelines is also configured with tracing pointing at the same collector, its reconciler spans appear as children of the PaC span, providing a single end-to-end trace from webhook receipt through task execution. See the [Tekton Pipelines tracing documentation](https://github.com/tektoncd/pipeline/blob/main/docs/developers/tracing.md) for Tekton's independent tracing setup.

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ require (
3030
gitlab.com/gitlab-org/api/client-go v1.46.0
3131
go.opentelemetry.io/otel v1.42.0
3232
go.opentelemetry.io/otel/metric v1.42.0
33+
go.opentelemetry.io/otel/sdk v1.42.0
3334
go.opentelemetry.io/otel/sdk/metric v1.42.0
35+
go.opentelemetry.io/otel/trace v1.42.0
3436
go.uber.org/zap v1.27.1
3537
golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90
3638
golang.org/x/oauth2 v0.36.0
@@ -91,8 +93,6 @@ require (
9193
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.42.0 // indirect
9294
go.opentelemetry.io/otel/exporters/prometheus v0.64.0 // indirect
9395
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.42.0 // indirect
94-
go.opentelemetry.io/otel/sdk v1.42.0 // indirect
95-
go.opentelemetry.io/otel/trace v1.42.0 // indirect
9696
go.opentelemetry.io/proto/otlp v1.10.0 // indirect
9797
go.uber.org/atomic v1.11.0 // indirect
9898
go.yaml.in/yaml/v2 v2.4.4 // indirect

pkg/adapter/adapter.go

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ import (
2323
"github.com/openshift-pipelines/pipelines-as-code/pkg/provider/gitea"
2424
"github.com/openshift-pipelines/pipelines-as-code/pkg/provider/github"
2525
"github.com/openshift-pipelines/pipelines-as-code/pkg/provider/gitlab"
26+
"github.com/openshift-pipelines/pipelines-as-code/pkg/tracing"
27+
"go.opentelemetry.io/otel"
28+
"go.opentelemetry.io/otel/propagation"
29+
"go.opentelemetry.io/otel/trace"
2630
"go.uber.org/zap"
2731
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2832
"knative.dev/eventing/pkg/adapter/v2"
@@ -192,6 +196,26 @@ func (l listener) handleEvent(ctx context.Context) http.HandlerFunc {
192196
}
193197
gitProvider.SetPacInfo(&pacInfo)
194198

199+
// Extract inbound trace context from request headers for distributed tracing
200+
tracedCtx := otel.GetTextMapPropagator().Extract(ctx, propagation.HeaderCarrier(request.Header))
201+
202+
// Start a span for webhook handling
203+
tracer := otel.Tracer(tracing.TracerName)
204+
tracedCtx, span := tracer.Start(tracedCtx, "PipelinesAsCode:ProcessEvent",
205+
trace.WithSpanKind(trace.SpanKindServer),
206+
)
207+
208+
span.SetAttributes(
209+
tracing.VCSEventTypeKey.String(l.event.EventType),
210+
tracing.VCSProviderKey.String(gitProvider.GetConfig().Name),
211+
)
212+
if l.event.URL != "" {
213+
span.SetAttributes(tracing.VCSRepositoryKey.String(l.event.URL))
214+
}
215+
if l.event.SHA != "" {
216+
span.SetAttributes(tracing.VCSRevisionKey.String(l.event.SHA))
217+
}
218+
195219
s := sinker{
196220
run: l.run,
197221
vcx: gitProvider,
@@ -207,8 +231,10 @@ func (l listener) handleEvent(ctx context.Context) http.HandlerFunc {
207231
localRequest := request.Clone(request.Context())
208232

209233
go func() {
210-
err := s.processEvent(ctx, localRequest)
234+
defer span.End()
235+
err := s.processEvent(tracedCtx, localRequest)
211236
if err != nil {
237+
span.RecordError(err)
212238
logger.Errorf("an error occurred: %v", err)
213239
}
214240
}()

pkg/apis/pipelinesascode/keys/keys.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ const (
6666
GithubApplicationID = "github-application-id"
6767
GithubPrivateKey = "github-private-key"
6868
ResultsRecordSummary = "results.tekton.dev/recordSummaryAnnotations"
69+
70+
// SpanContextAnnotation is the annotation key for propagating span context to Tekton for distributed tracing
71+
SpanContextAnnotation = "tekton.dev/pipelinerunSpanContext"
6972
)
7073

7174
var ParamsRe = regexp.MustCompile(`{{([^}]{2,})}}`)

pkg/kubeinteraction/labels.go

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package kubeinteraction
22

33
import (
4+
"context"
5+
"encoding/json"
46
"fmt"
57
"strconv"
68

@@ -12,6 +14,9 @@ import (
1214
"github.com/openshift-pipelines/pipelines-as-code/pkg/params/info"
1315
"github.com/openshift-pipelines/pipelines-as-code/pkg/params/versiondata"
1416
tektonv1 "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1"
17+
"go.opentelemetry.io/otel"
18+
"go.opentelemetry.io/otel/propagation"
19+
"knative.dev/pkg/logging"
1520
)
1621

1722
const (
@@ -21,11 +26,15 @@ const (
2126
StateFailed = "failed"
2227
)
2328

24-
func AddLabelsAndAnnotations(event *info.Event, pipelineRun *tektonv1.PipelineRun, repo *apipac.Repository, providerConfig *info.ProviderConfig, paramsRun *params.Run) error {
29+
func AddLabelsAndAnnotations(ctx context.Context, event *info.Event, pipelineRun *tektonv1.PipelineRun, repo *apipac.Repository, providerConfig *info.ProviderConfig, paramsRun *params.Run) error {
2530
if event == nil {
2631
return fmt.Errorf("event should not be nil")
2732
}
2833
paramsinfo := paramsRun.Info
34+
35+
// Inject span context for distributed tracing
36+
carrier := propagation.MapCarrier{}
37+
otel.GetTextMapPropagator().Inject(ctx, carrier)
2938
// Add labels on the soon-to-be created pipelinerun so UI/CLI can easily
3039
// query them.
3140
labels := map[string]string{
@@ -59,6 +68,16 @@ func AddLabelsAndAnnotations(event *info.Event, pipelineRun *tektonv1.PipelineRu
5968
paramsinfo.Controller.Name, paramsinfo.Controller.Configmap, paramsinfo.Controller.Secret, paramsinfo.Controller.GlobalRepository),
6069
}
6170

71+
// Add span context for distributed tracing if available
72+
if len(carrier) > 0 {
73+
if jsonBytes, err := json.Marshal(carrier); err == nil {
74+
if existing := pipelineRun.GetAnnotations()[keys.SpanContextAnnotation]; existing != "" {
75+
logging.FromContext(ctx).Warnf("overwriting pre-existing %s annotation on PipelineRun template; honoring initiating event trace context", keys.SpanContextAnnotation)
76+
}
77+
annotations[keys.SpanContextAnnotation] = string(jsonBytes)
78+
}
79+
}
80+
6281
if event.PullRequestNumber != 0 {
6382
labels[keys.PullRequest] = strconv.Itoa(event.PullRequestNumber)
6483
annotations[keys.PullRequest] = strconv.Itoa(event.PullRequestNumber)

pkg/kubeinteraction/labels_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package kubeinteraction
22

33
import (
4+
"context"
45
"fmt"
56
"testing"
67

@@ -68,7 +69,7 @@ func TestAddLabelsAndAnnotations(t *testing.T) {
6869
Controller: tt.args.controllerInfo,
6970
},
7071
}
71-
err := AddLabelsAndAnnotations(tt.args.event, tt.args.pipelineRun, tt.args.repo, &info.ProviderConfig{}, paramsRun)
72+
err := AddLabelsAndAnnotations(context.Background(), tt.args.event, tt.args.pipelineRun, tt.args.repo, &info.ProviderConfig{}, paramsRun)
7273
assert.NilError(t, err)
7374
assert.Equal(t, tt.args.pipelineRun.Labels[keys.URLOrg], tt.args.event.Organization, "'%s' != %s",
7475
tt.args.pipelineRun.Labels[keys.URLOrg], tt.args.event.Organization)

pkg/pipelineascode/pipelineascode.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ func (p *PacRun) startPR(ctx context.Context, match matcher.Match) (*tektonv1.Pi
250250
}
251251

252252
// Add labels and annotations to pipelinerun
253-
err := kubeinteraction.AddLabelsAndAnnotations(p.event, match.PipelineRun, match.Repo, p.vcx.GetConfig(), p.run)
253+
err := kubeinteraction.AddLabelsAndAnnotations(ctx, p.event, match.PipelineRun, match.Repo, p.vcx.GetConfig(), p.run)
254254
if err != nil {
255255
p.logger.Errorf("Error adding labels/annotations to PipelineRun '%s' in namespace '%s': %v", match.PipelineRun.GetName(), match.Repo.GetNamespace(), err)
256256
} else {

pkg/reconciler/emit_traces.go

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
package reconciler
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
7+
"github.com/openshift-pipelines/pipelines-as-code/pkg/apis/pipelinesascode/keys"
8+
"github.com/openshift-pipelines/pipelines-as-code/pkg/tracing"
9+
tektonv1 "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1"
10+
"go.opentelemetry.io/otel"
11+
"go.opentelemetry.io/otel/attribute"
12+
"go.opentelemetry.io/otel/propagation"
13+
semconv "go.opentelemetry.io/otel/semconv/v1.37.0"
14+
"go.opentelemetry.io/otel/trace"
15+
corev1 "k8s.io/api/core/v1"
16+
"knative.dev/pkg/apis"
17+
)
18+
19+
const (
20+
applicationLabel = "appstudio.openshift.io/application"
21+
componentLabel = "appstudio.openshift.io/component"
22+
stageBuild = "build"
23+
)
24+
25+
26+
// extractSpanContext extracts the trace context from the pipelinerunSpanContext annotation.
27+
func extractSpanContext(pr *tektonv1.PipelineRun) (context.Context, bool) {
28+
raw, ok := pr.GetAnnotations()[keys.SpanContextAnnotation]
29+
if !ok || raw == "" {
30+
return nil, false
31+
}
32+
var carrierMap map[string]string
33+
if err := json.Unmarshal([]byte(raw), &carrierMap); err != nil {
34+
return nil, false
35+
}
36+
carrier := propagation.MapCarrier(carrierMap)
37+
ctx := otel.GetTextMapPropagator().Extract(context.Background(), carrier)
38+
sc := trace.SpanContextFromContext(ctx)
39+
if !sc.IsValid() {
40+
return nil, false
41+
}
42+
return ctx, true
43+
}
44+
45+
// emitTimingSpans emits wait_duration and execute_duration spans for a completed build PipelineRun.
46+
func emitTimingSpans(pr *tektonv1.PipelineRun) {
47+
parentCtx, ok := extractSpanContext(pr)
48+
if !ok {
49+
return
50+
}
51+
52+
tracer := otel.Tracer(tracing.TracerName)
53+
commonAttrs := buildCommonAttributes(pr)
54+
55+
// Emit waitDuration: creationTimestamp -> status.startTime
56+
if pr.Status.StartTime != nil {
57+
_, waitSpan := tracer.Start(parentCtx, "waitDuration",
58+
trace.WithTimestamp(pr.CreationTimestamp.Time),
59+
trace.WithAttributes(commonAttrs...),
60+
)
61+
waitSpan.End(trace.WithTimestamp(pr.Status.StartTime.Time))
62+
}
63+
64+
// Emit executeDuration: status.startTime -> status.completionTime
65+
if pr.Status.StartTime != nil && pr.Status.CompletionTime != nil {
66+
execAttrs := append(append([]attribute.KeyValue{}, commonAttrs...), buildExecuteAttributes(pr)...)
67+
_, execSpan := tracer.Start(parentCtx, "executeDuration",
68+
trace.WithTimestamp(pr.Status.StartTime.Time),
69+
trace.WithAttributes(execAttrs...),
70+
)
71+
execSpan.End(trace.WithTimestamp(pr.Status.CompletionTime.Time))
72+
}
73+
}
74+
75+
// buildCommonAttributes returns span attributes common to both timing spans.
76+
func buildCommonAttributes(pr *tektonv1.PipelineRun) []attribute.KeyValue {
77+
attrs := []attribute.KeyValue{
78+
semconv.K8SNamespaceName(pr.GetNamespace()),
79+
tracing.TektonPipelineRunNameKey.String(pr.GetName()),
80+
tracing.TektonPipelineRunUIDKey.String(string(pr.GetUID())),
81+
tracing.DeliveryStageKey.String(stageBuild),
82+
tracing.DeliveryApplicationKey.String(pr.GetLabels()[applicationLabel]),
83+
}
84+
if component := pr.GetLabels()[componentLabel]; component != "" {
85+
attrs = append(attrs, tracing.DeliveryComponentKey.String(component))
86+
}
87+
return attrs
88+
}
89+
90+
// buildExecuteAttributes returns span attributes specific to execute_duration.
91+
func buildExecuteAttributes(pr *tektonv1.PipelineRun) []attribute.KeyValue {
92+
cond := pr.Status.GetCondition(apis.ConditionSucceeded)
93+
success := false
94+
reason := ""
95+
if cond != nil {
96+
reason = cond.Reason
97+
success = cond.Status == corev1.ConditionTrue
98+
}
99+
return []attribute.KeyValue{
100+
tracing.DeliverySuccessKey.Bool(success),
101+
tracing.DeliveryReasonKey.String(reason),
102+
}
103+
}

0 commit comments

Comments
 (0)