Skip to content

Commit 01f5ac2

Browse files
ci-operatorclaude
authored andcommitted
feat: add distributed tracing for webhook handling and PipelineRun timing
Add OpenTelemetry distributed tracing to PaC. Spans cover webhook event processing and PipelineRun lifecycle timing. Attribute naming follows OTel semconv where possible (vcs.*, cicd.*, k8s.*), matches Tekton's existing bare attribute names for resource identity, and uses delivery.tekton.dev.* for domain-specific delivery pipeline concepts per OTel reverse-domain naming guidance. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent a387d41 commit 01f5ac2

File tree

14 files changed

+797
-8
lines changed

14 files changed

+797
-8
lines changed

cmd/pipelines-as-code-controller/main.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ func main() {
4040
loggerConfigurator := evadapter.NewLoggerConfiguratorFromConfigMap(PACControllerLogKey, loggerConfiguratorOpt)
4141
copts := []evadapter.ConfiguratorOption{
4242
evadapter.WithLoggerConfigurator(loggerConfigurator),
43-
evadapter.WithObservabilityConfigurator(evadapter.NewObservabilityConfiguratorFromConfigMap()),
43+
evadapter.WithObservabilityConfigurator(evadapter.NewObservabilityConfiguratorFromConfigMap(
44+
evadapter.WithObservabilityConfiguratorConfigMapName("pipelines-as-code-config-observability"),
45+
)),
4446
evadapter.WithCloudEventsStatusReporterConfigurator(evadapter.NewCloudEventsReporterConfiguratorFromConfigMap()),
4547
}
4648
// put logger configurator to ctx

config/305-config-observability.yaml

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,18 @@ data:
5050
5151
# metrics-export-interval specifies how often metrics are exported.
5252
# Only applicable for grpc and http/protobuf protocols.
53-
# metrics-export-interval: "30s"
53+
# metrics-export-interval: "30s"
54+
55+
# tracing-protocol specifies the trace export protocol.
56+
# Supported values: "grpc", "http/protobuf", "none".
57+
# Default is "none" (tracing disabled).
58+
# tracing-protocol: "none"
59+
60+
# tracing-endpoint specifies the OTLP collector endpoint.
61+
# Required when tracing-protocol is "grpc" or "http/protobuf".
62+
# The OTEL_EXPORTER_OTLP_ENDPOINT env var takes precedence if set.
63+
# tracing-endpoint: "http://otel-collector.observability.svc.cluster.local:4317"
64+
65+
# tracing-sampling-rate controls the fraction of traces sampled.
66+
# 0.0 = none, 1.0 = all. Default is 0 (none).
67+
# tracing-sampling-rate: "1.0"
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
---
2+
title: Distributed Tracing
3+
weight: 5
4+
---
5+
6+
This page describes how to enable OpenTelemetry distributed tracing for Pipelines-as-Code. When enabled, PaC emits trace spans for webhook event processing and PipelineRun lifecycle timing.
7+
8+
## Enabling tracing
9+
10+
The ConfigMap `pipelines-as-code-config-observability` controls tracing configuration. See [config/305-config-observability.yaml](https://github.com/tektoncd/pipelines-as-code/blob/main/config/305-config-observability.yaml) for the full example.
11+
12+
It contains the following tracing fields:
13+
14+
* `tracing-protocol`: Export protocol. Supported values: `grpc`, `http/protobuf`, `none`. Default is `none` (tracing disabled).
15+
* `tracing-endpoint`: OTLP collector endpoint. Required when protocol is not `none`. The `OTEL_EXPORTER_OTLP_ENDPOINT` environment variable takes precedence if set.
16+
* `tracing-sampling-rate`: Fraction of traces to sample. `0.0` = none, `1.0` = all. Default is `0`.
17+
18+
### Example
19+
20+
```yaml
21+
apiVersion: v1
22+
kind: ConfigMap
23+
metadata:
24+
name: pipelines-as-code-config-observability
25+
namespace: pipelines-as-code
26+
data:
27+
tracing-protocol: grpc
28+
tracing-endpoint: "http://otel-collector.observability.svc.cluster.local:4317"
29+
tracing-sampling-rate: "1.0"
30+
```
31+
32+
Changes to the ConfigMap are picked up automatically without restarting the controller. Set `tracing-protocol` to `none` or remove the tracing keys to disable tracing.
33+
34+
## Emitted spans
35+
36+
The controller emits a `PipelinesAsCode:ProcessEvent` span covering the full lifecycle of each webhook event, from receipt through PipelineRun creation. The watcher emits `waitDuration` and `executeDuration` spans for completed PipelineRuns, using the PipelineRun's actual timestamps for accurate wall-clock timing.
37+
38+
## Trace context propagation
39+
40+
When Pipelines-as-Code creates a PipelineRun, it sets the `tekton.dev/pipelinerunSpanContext` annotation with a JSON-encoded OTel TextMapCarrier containing the W3C `traceparent`. PaC tracing works independently — you get PaC spans regardless of whether Tekton Pipelines has tracing enabled.
41+
42+
If Tekton Pipelines is also configured with tracing pointing at the same collector, its reconciler spans appear as children of the PaC span, providing a single end-to-end trace from webhook receipt through task execution. See the [Tekton Pipelines tracing documentation](https://github.com/tektoncd/pipeline/blob/main/docs/developers/tracing.md) for Tekton's independent tracing setup.
43+
44+
## Deploying a trace collector
45+
46+
Pipelines-as-Code exports traces using the standard OpenTelemetry Protocol (OTLP). You need a running OTLP-compatible collector for the `tracing-endpoint` to point to. Common options include:
47+
48+
* [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/) -- the vendor-neutral reference collector
49+
* [Jaeger](https://www.jaegertracing.io/docs/latest/getting-started/) -- supports OTLP ingestion natively since v1.35
50+
51+
Deploying and operating a collector is outside the scope of Pipelines-as-Code. Refer to your organization's observability infrastructure or the links above for setup instructions.

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ require (
3030
gitlab.com/gitlab-org/api/client-go v1.46.0
3131
go.opentelemetry.io/otel v1.42.0
3232
go.opentelemetry.io/otel/metric v1.42.0
33+
go.opentelemetry.io/otel/sdk v1.42.0
3334
go.opentelemetry.io/otel/sdk/metric v1.42.0
35+
go.opentelemetry.io/otel/trace v1.42.0
3436
go.uber.org/zap v1.27.1
3537
golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90
3638
golang.org/x/oauth2 v0.36.0
@@ -91,8 +93,6 @@ require (
9193
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.42.0 // indirect
9294
go.opentelemetry.io/otel/exporters/prometheus v0.64.0 // indirect
9395
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.42.0 // indirect
94-
go.opentelemetry.io/otel/sdk v1.42.0 // indirect
95-
go.opentelemetry.io/otel/trace v1.42.0 // indirect
9696
go.opentelemetry.io/proto/otlp v1.10.0 // indirect
9797
go.uber.org/atomic v1.11.0 // indirect
9898
go.yaml.in/yaml/v2 v2.4.4 // indirect

pkg/adapter/adapter.go

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ import (
2323
"github.com/openshift-pipelines/pipelines-as-code/pkg/provider/gitea"
2424
"github.com/openshift-pipelines/pipelines-as-code/pkg/provider/github"
2525
"github.com/openshift-pipelines/pipelines-as-code/pkg/provider/gitlab"
26+
"github.com/openshift-pipelines/pipelines-as-code/pkg/tracing"
27+
"go.opentelemetry.io/otel"
28+
"go.opentelemetry.io/otel/propagation"
29+
"go.opentelemetry.io/otel/trace"
2630
"go.uber.org/zap"
2731
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2832
"knative.dev/eventing/pkg/adapter/v2"
@@ -191,6 +195,19 @@ func (l listener) handleEvent(ctx context.Context) http.HandlerFunc {
191195
}
192196
gitProvider.SetPacInfo(&pacInfo)
193197

198+
// Extract inbound trace context from request headers for distributed tracing
199+
tracedCtx := otel.GetTextMapPropagator().Extract(ctx, propagation.HeaderCarrier(request.Header))
200+
201+
// Start a span for webhook handling
202+
tracer := otel.Tracer(tracing.TracerName)
203+
tracedCtx, span := tracer.Start(tracedCtx, "PipelinesAsCode:ProcessEvent",
204+
trace.WithSpanKind(trace.SpanKindServer),
205+
)
206+
207+
span.SetAttributes(
208+
tracing.VCSProviderName.String(gitProvider.GetConfig().Name),
209+
)
210+
194211
s := sinker{
195212
run: l.run,
196213
vcx: gitProvider,
@@ -206,8 +223,10 @@ func (l listener) handleEvent(ctx context.Context) http.HandlerFunc {
206223
localRequest := request.Clone(request.Context())
207224

208225
go func() {
209-
err := s.processEvent(ctx, localRequest)
226+
defer span.End()
227+
err := s.processEvent(tracedCtx, localRequest)
210228
if err != nil {
229+
span.RecordError(err)
211230
logger.Errorf("an error occurred: %v", err)
212231
}
213232
}()

pkg/adapter/sinker.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import (
1414
"github.com/openshift-pipelines/pipelines-as-code/pkg/pipelineascode"
1515
"github.com/openshift-pipelines/pipelines-as-code/pkg/provider"
1616
"github.com/openshift-pipelines/pipelines-as-code/pkg/provider/status"
17+
"github.com/openshift-pipelines/pipelines-as-code/pkg/tracing"
18+
"go.opentelemetry.io/otel/trace"
1719
"go.uber.org/zap"
1820
)
1921

@@ -117,6 +119,10 @@ func (s *sinker) processEvent(ctx context.Context, request *http.Request) error
117119
}
118120
}
119121

122+
// Enrich span with VCS attributes — for incoming events these are
123+
// pre-populated; for webhook events ParsePayload filled them in.
124+
setVCSSpanAttributes(ctx, s.event)
125+
120126
p := pipelineascode.NewPacs(s.event, s.vcx, s.run, s.pacInfo, s.kint, s.logger, s.globalRepo)
121127
return p.Run(ctx)
122128
}
@@ -174,3 +180,18 @@ func (s *sinker) createSkipCIStatus(ctx context.Context) error {
174180

175181
return nil
176182
}
183+
184+
// setVCSSpanAttributes enriches the current span with VCS attributes from the event.
185+
func setVCSSpanAttributes(ctx context.Context, event *info.Event) {
186+
span := trace.SpanFromContext(ctx)
187+
if !span.IsRecording() {
188+
return
189+
}
190+
span.SetAttributes(tracing.VCSEventType.String(event.EventType))
191+
if event.URL != "" {
192+
span.SetAttributes(tracing.VCSRepositoryURLFull.String(event.URL))
193+
}
194+
if event.SHA != "" {
195+
span.SetAttributes(tracing.VCSRefHeadRevision.String(event.SHA))
196+
}
197+
}

pkg/apis/pipelinesascode/keys/keys.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ const (
6868
GithubApplicationID = "github-application-id"
6969
GithubPrivateKey = "github-private-key"
7070
ResultsRecordSummary = "results.tekton.dev/recordSummaryAnnotations"
71+
72+
// SpanContextAnnotation is the annotation key for propagating span context to Tekton for distributed tracing.
73+
SpanContextAnnotation = "tekton.dev/pipelinerunSpanContext"
7174
)
7275

7376
var ParamsRe = regexp.MustCompile(`{{([^}]{2,})}}`)

pkg/kubeinteraction/labels.go

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package kubeinteraction
22

33
import (
4+
"context"
5+
"encoding/json"
46
"fmt"
57
"strconv"
68

@@ -12,6 +14,9 @@ import (
1214
"github.com/openshift-pipelines/pipelines-as-code/pkg/params/info"
1315
"github.com/openshift-pipelines/pipelines-as-code/pkg/params/versiondata"
1416
tektonv1 "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1"
17+
"go.opentelemetry.io/otel"
18+
"go.opentelemetry.io/otel/propagation"
19+
"knative.dev/pkg/logging"
1520
)
1621

1722
const (
@@ -21,11 +26,15 @@ const (
2126
StateFailed = "failed"
2227
)
2328

24-
func AddLabelsAndAnnotations(event *info.Event, pipelineRun *tektonv1.PipelineRun, repo *apipac.Repository, providerConfig *info.ProviderConfig, paramsRun *params.Run) error {
29+
func AddLabelsAndAnnotations(ctx context.Context, event *info.Event, pipelineRun *tektonv1.PipelineRun, repo *apipac.Repository, providerConfig *info.ProviderConfig, paramsRun *params.Run) error {
2530
if event == nil {
2631
return fmt.Errorf("event should not be nil")
2732
}
2833
paramsinfo := paramsRun.Info
34+
35+
// Inject span context for distributed tracing
36+
carrier := propagation.MapCarrier{}
37+
otel.GetTextMapPropagator().Inject(ctx, carrier)
2938
// Add labels on the soon-to-be created pipelinerun so UI/CLI can easily
3039
// query them.
3140
labels := map[string]string{
@@ -60,6 +69,18 @@ func AddLabelsAndAnnotations(event *info.Event, pipelineRun *tektonv1.PipelineRu
6069
paramsinfo.Controller.Name, paramsinfo.Controller.Configmap, paramsinfo.Controller.Secret, paramsinfo.Controller.GlobalRepository),
6170
}
6271

72+
// Add span context for distributed tracing if available
73+
if len(carrier) > 0 {
74+
if jsonBytes, err := json.Marshal(carrier); err != nil {
75+
logging.FromContext(ctx).Errorf("failed to marshal span context carrier: %v", err)
76+
} else {
77+
if existing := pipelineRun.GetAnnotations()[keys.SpanContextAnnotation]; existing != "" {
78+
logging.FromContext(ctx).Warnf("overwriting pre-existing %s annotation on PipelineRun template; honoring initiating event trace context", keys.SpanContextAnnotation)
79+
}
80+
annotations[keys.SpanContextAnnotation] = string(jsonBytes)
81+
}
82+
}
83+
6384
if event.PullRequestNumber != 0 {
6485
labels[keys.PullRequest] = strconv.Itoa(event.PullRequestNumber)
6586
annotations[keys.PullRequest] = strconv.Itoa(event.PullRequestNumber)

pkg/kubeinteraction/labels_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package kubeinteraction
22

33
import (
4+
"context"
45
"fmt"
56
"testing"
67

@@ -69,7 +70,7 @@ func TestAddLabelsAndAnnotations(t *testing.T) {
6970
Controller: tt.args.controllerInfo,
7071
},
7172
}
72-
err := AddLabelsAndAnnotations(tt.args.event, tt.args.pipelineRun, tt.args.repo, &info.ProviderConfig{}, paramsRun)
73+
err := AddLabelsAndAnnotations(context.Background(), tt.args.event, tt.args.pipelineRun, tt.args.repo, &info.ProviderConfig{}, paramsRun)
7374
assert.NilError(t, err)
7475
assert.Equal(t, tt.args.pipelineRun.Labels[keys.URLOrg], tt.args.event.Organization, "'%s' != %s",
7576
tt.args.pipelineRun.Labels[keys.URLOrg], tt.args.event.Organization)

pkg/pipelineascode/pipelineascode.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ func (p *PacRun) startPR(ctx context.Context, match matcher.Match) (*tektonv1.Pi
216216
)
217217

218218
// Add labels and annotations to pipelinerun
219-
err := kubeinteraction.AddLabelsAndAnnotations(p.event, match.PipelineRun, match.Repo, p.vcx.GetConfig(), p.run)
219+
err := kubeinteraction.AddLabelsAndAnnotations(ctx, p.event, match.PipelineRun, match.Repo, p.vcx.GetConfig(), p.run)
220220
if err != nil {
221221
p.logger.Errorf("Error adding labels/annotations to PipelineRun '%s' in namespace '%s': %v", match.PipelineRun.GetName(), match.Repo.GetNamespace(), err)
222222
} else {

0 commit comments

Comments
 (0)