Skip to content

Commit 9d5e582

Browse files
Add OTEL tracing to policy implementation
This introduces OTEL tracing to the policy implementation. With this change Tracing spans are going to be picked up from whoever invoked terraform, passed through the policy evaluation (and with the callback server back from the policy evaluation).
1 parent d996cf0 commit 9d5e582

21 files changed

Lines changed: 694 additions & 34 deletions

internal/backend/local/backend_apply.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ import (
1212

1313
"github.com/hashicorp/hcl/v2"
1414
"github.com/zclconf/go-cty/cty"
15+
"go.opentelemetry.io/otel/attribute"
16+
"go.opentelemetry.io/otel/trace"
1517

1618
"github.com/hashicorp/terraform/internal/addrs"
1719
"github.com/hashicorp/terraform/internal/backend/backendrun"
@@ -93,6 +95,7 @@ func (b *Local) opApply(
9395

9496
var plan *plans.Plan
9597
combinedPlanApply := false
98+
lr.Core.SetTracingContext(stopCtx)
9699
// If we weren't given a plan, then we refresh/plan
97100
if op.PlanFile == nil {
98101
// set the policy client to nil for the plan preceding apply
@@ -445,7 +448,22 @@ func (b *Local) opApply(
445448
diags = diags.Append(applyDiags)
446449

447450
// Print the policy results we found during apply
451+
policyResultCount := 0
452+
if plan.PolicyResults != nil {
453+
policyResultCount = plan.PolicyResults.Len()
454+
}
455+
var polRenderSpan trace.Span
456+
polRenderSpanEnd := func() {}
457+
if policyResultCount > 0 {
458+
_, polRenderSpan = tracer().Start(stopCtx, "terraform.local.apply.render_policy_results",
459+
trace.WithAttributes(
460+
attribute.Int("apply.policy_results", policyResultCount),
461+
),
462+
)
463+
polRenderSpanEnd = func() { polRenderSpan.End() }
464+
}
448465
op.View.PolicyResults(plan.PolicyResults, nil)
466+
polRenderSpanEnd()
449467

450468
// Even on error with an empty state, the state value should not be nil.
451469
// Return early here to prevent corrupting any existing state.

internal/backend/local/backend_plan.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ import (
99
"io"
1010
"log"
1111

12+
"go.opentelemetry.io/otel/attribute"
13+
"go.opentelemetry.io/otel/trace"
14+
1215
"github.com/hashicorp/terraform/internal/backend/backendrun"
1316
"github.com/hashicorp/terraform/internal/genconfig"
1417
"github.com/hashicorp/terraform/internal/logging"
@@ -118,6 +121,7 @@ func (b *Local) opPlan(
118121
defer logging.PanicHandler()
119122
defer close(doneCh)
120123
log.Printf("[INFO] backend/local: plan calling Plan")
124+
lr.Core.SetTracingContext(stopCtx)
121125
plan, planDiags = lr.Core.Plan(lr.Config, lr.InputState, lr.PlanOpts)
122126
}()
123127

@@ -223,7 +227,22 @@ func (b *Local) opPlan(
223227
op.View.Plan(plan, schemas)
224228

225229
// Report all policy results that may have accumulated during the plan
230+
policyResultCount := 0
231+
if plan.PolicyResults != nil {
232+
policyResultCount = plan.PolicyResults.Len()
233+
}
234+
var polRenderSpan trace.Span
235+
polRenderSpanEnd := func() {}
236+
if policyResultCount > 0 {
237+
_, polRenderSpan = tracer().Start(stopCtx, "terraform.local.plan.render_policy_results",
238+
trace.WithAttributes(
239+
attribute.Int("plan.policy_results", policyResultCount),
240+
),
241+
)
242+
polRenderSpanEnd = func() { polRenderSpan.End() }
243+
}
226244
op.View.PolicyResults(plan.PolicyResults, nil)
245+
polRenderSpanEnd()
227246

228247
// If we've accumulated any diagnostics along the way then we'll show them
229248
// here just before we show the summary and next steps. This can potentially

internal/backend/local/backend_refresh.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ func (b *Local) opRefresh(
9191
go func() {
9292
defer logging.PanicHandler()
9393
defer close(doneCh)
94+
lr.Core.SetTracingContext(stopCtx)
9495
newState, refreshDiags = lr.Core.Refresh(lr.Config, lr.InputState, lr.PlanOpts)
9596
log.Printf("[INFO] backend/local: refresh calling Refresh")
9697
}()
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// Copyright IBM Corp. 2014, 2026
2+
// SPDX-License-Identifier: BUSL-1.1
3+
4+
package local
5+
6+
import (
7+
"go.opentelemetry.io/otel"
8+
"go.opentelemetry.io/otel/trace"
9+
)
10+
11+
// tracer returns the OpenTelemetry tracer for the local backend.
12+
//
13+
// Resolved lazily on every call so the global TracerProvider installed by
14+
// the CLI's openTelemetryInit (which runs after this package's init) is
15+
// reflected in the tracer used at runtime.
16+
func tracer() trace.Tracer {
17+
return otel.Tracer("github.com/hashicorp/terraform/internal/backend/local")
18+
}

internal/command/apply.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
package command
55

66
import (
7-
"context"
87
"fmt"
98
"strings"
109

@@ -105,7 +104,7 @@ func (c *ApplyCommand) Run(rawArgs []string) int {
105104
}
106105

107106
if len(args.PolicyPaths) > 0 {
108-
client, policyDiags, stopClient := c.PolicyClient(context.Background(), args.PolicyPaths)
107+
client, policyDiags, stopClient := c.PolicyClient(c.CommandContext(), args.PolicyPaths)
109108
// if there has been any errors when setting up the policy client, we log them but
110109
// we still proceed with the operation, as a failure to set up the policy client
111110
// should not prevent the apply operation from running

internal/command/meta.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,10 @@ import (
2020

2121
"github.com/hashicorp/cli"
2222
plugin "github.com/hashicorp/go-plugin"
23-
"github.com/hashicorp/terraform-svchost/disco"
2423
"github.com/mitchellh/colorstring"
2524

25+
"github.com/hashicorp/terraform-svchost/disco"
26+
2627
"github.com/hashicorp/terraform/internal/addrs"
2728
"github.com/hashicorp/terraform/internal/backend"
2829
"github.com/hashicorp/terraform/internal/backend/backendrun"
@@ -497,7 +498,7 @@ func (m *Meta) RunOperation(b backendrun.OperationsBackend, opReq *backendrun.Op
497498
opReq.ConfigDir = m.normalizePath(opReq.ConfigDir)
498499
}
499500

500-
op, err := b.Operation(context.Background(), opReq)
501+
op, err := b.Operation(m.CommandContext(), opReq)
501502
if err != nil {
502503
return nil, fmt.Errorf("error starting operation: %s", err)
503504
}

internal/command/plan.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
package command
55

66
import (
7-
"context"
87
"fmt"
98
"strings"
109

@@ -89,7 +88,7 @@ func (c *PlanCommand) Run(rawArgs []string) int {
8988
}
9089

9190
if len(args.PolicyPaths) > 0 {
92-
client, policyDiags, stopClient := c.PolicyClient(context.Background(), args.PolicyPaths)
91+
client, policyDiags, stopClient := c.PolicyClient(c.CommandContext(), args.PolicyPaths)
9392
// if there has been any errors when setting up the policy client, we log them but
9493
// we still proceed with the operation, as a failure to set up the policy client
9594
// should not prevent the plan operation from running

internal/policy/callback/callback.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,16 @@
44
package callback
55

66
import (
7+
"context"
78
"sync"
89
"sync/atomic"
910

1011
"github.com/zclconf/go-cty/cty"
1112
)
1213

1314
type Functions struct {
14-
GetResources func(resource string, attrs cty.Value) ([]cty.Value, error)
15-
GetDataSource func(datasource string, attrs cty.Value) (cty.Value, error)
15+
GetResources func(ctx context.Context, resource string, attrs cty.Value) ([]cty.Value, error)
16+
GetDataSource func(ctx context.Context, datasource string, attrs cty.Value) (cty.Value, error)
1617
}
1718

1819
// Registry is an interface for managing callback functions for resources and

internal/policy/callback/server.go

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ import (
99

1010
"github.com/zclconf/go-cty/cty"
1111
"github.com/zclconf/go-cty/cty/msgpack"
12+
"go.opentelemetry.io/otel"
13+
"go.opentelemetry.io/otel/attribute"
14+
"go.opentelemetry.io/otel/codes"
15+
"go.opentelemetry.io/otel/trace"
1216
"google.golang.org/grpc"
1317

1418
"github.com/hashicorp/terraform/internal/policy/proto"
@@ -18,59 +22,104 @@ var (
1822
_ proto.CallbackServiceServer = (*Server)(nil)
1923
)
2024

25+
// tracer is resolved lazily so the global TracerProvider installed by
26+
// openTelemetryInit (which runs after package init) is reflected.
27+
func tracer() trace.Tracer {
28+
return otel.Tracer("github.com/hashicorp/terraform/internal/policy/callback")
29+
}
30+
2131
type Server struct {
2232
ID uint32
2333
Registry Registry
2434
Grpc *grpc.Server
2535
proto.UnimplementedCallbackServiceServer
2636
}
2737

28-
func (s *Server) GetResources(_ context.Context, request *proto.GetResourcesRequest) (*proto.GetResourcesResponse, error) {
38+
func (s *Server) GetResources(ctx context.Context, request *proto.GetResourcesRequest) (*proto.GetResourcesResponse, error) {
39+
ctx, span := tracer().Start(ctx, "policy.callback.server.get_resources",
40+
trace.WithAttributes(
41+
attribute.String("policy.callback.resource_type", request.Type),
42+
attribute.Int64("policy.evaluation.id", int64(request.EvaluationRequestId)),
43+
),
44+
)
45+
defer span.End()
46+
2947
attrs, err := msgpack.Unmarshal(request.Attributes, cty.DynamicPseudoType)
3048
if err != nil {
31-
return nil, fmt.Errorf("failed to unserialize attributes: %w", err)
49+
err = fmt.Errorf("failed to unserialize attributes: %w", err)
50+
span.RecordError(err)
51+
span.SetStatus(codes.Error, err.Error())
52+
return nil, err
3253
}
3354
functions, ok := s.Registry.Get(request.EvaluationRequestId)
3455
if !ok {
35-
return nil, fmt.Errorf("no callback registered for ID %d (request type: %s)", request.EvaluationRequestId, request.Type)
56+
err := fmt.Errorf("no callback registered for ID %d (request type: %s)", request.EvaluationRequestId, request.Type)
57+
span.RecordError(err)
58+
span.SetStatus(codes.Error, err.Error())
59+
return nil, err
3660
}
37-
resources, err := functions.GetResources(request.Type, attrs)
61+
resources, err := functions.GetResources(ctx, request.Type, attrs)
3862
if err != nil {
63+
span.RecordError(err)
64+
span.SetStatus(codes.Error, err.Error())
3965
return nil, err
4066
}
4167

4268
results := make([][]byte, 0, len(resources))
4369
for _, resource := range resources {
4470
result, err := msgpack.Marshal(resource, cty.DynamicPseudoType)
4571
if err != nil {
46-
return nil, fmt.Errorf("failed to serialize resource: %w", err)
72+
err = fmt.Errorf("failed to serialize resource: %w", err)
73+
span.RecordError(err)
74+
span.SetStatus(codes.Error, err.Error())
75+
return nil, err
4776
}
4877
results = append(results, result)
4978
}
5079

80+
span.SetAttributes(attribute.Int("policy.callback.results.count", len(results)))
5181
return &proto.GetResourcesResponse{
5282
Results: results,
5383
}, nil
5484
}
5585

56-
func (s *Server) GetDataSource(_ context.Context, request *proto.GetDataSourceRequest) (*proto.GetDataSourceResponse, error) {
86+
func (s *Server) GetDataSource(ctx context.Context, request *proto.GetDataSourceRequest) (*proto.GetDataSourceResponse, error) {
87+
ctx, span := tracer().Start(ctx, "policy.callback.server.get_datasource",
88+
trace.WithAttributes(
89+
attribute.String("policy.callback.datasource_type", request.Type),
90+
attribute.Int64("policy.evaluation.id", int64(request.EvaluationRequestId)),
91+
),
92+
)
93+
defer span.End()
94+
5795
config, err := msgpack.Unmarshal(request.Config, cty.DynamicPseudoType)
5896
if err != nil {
59-
return nil, fmt.Errorf("failed to unserialize config: %w", err)
97+
err = fmt.Errorf("failed to unserialize config: %w", err)
98+
span.RecordError(err)
99+
span.SetStatus(codes.Error, err.Error())
100+
return nil, err
60101
}
61102

62103
functions, ok := s.Registry.Get(request.EvaluationRequestId)
63104
if !ok {
64-
return nil, fmt.Errorf("no callback registered for ID %d (request type: %s)", request.EvaluationRequestId, request.Type)
105+
err := fmt.Errorf("no callback registered for ID %d (request type: %s)", request.EvaluationRequestId, request.Type)
106+
span.RecordError(err)
107+
span.SetStatus(codes.Error, err.Error())
108+
return nil, err
65109
}
66-
datasource, err := functions.GetDataSource(request.Type, config)
110+
datasource, err := functions.GetDataSource(ctx, request.Type, config)
67111
if err != nil {
112+
span.RecordError(err)
113+
span.SetStatus(codes.Error, err.Error())
68114
return nil, err
69115
}
70116

71117
result, err := msgpack.Marshal(datasource, cty.DynamicPseudoType)
72118
if err != nil {
73-
return nil, fmt.Errorf("failed to serialize datasource: %w", err)
119+
err = fmt.Errorf("failed to serialize datasource: %w", err)
120+
span.RecordError(err)
121+
span.SetStatus(codes.Error, err.Error())
122+
return nil, err
74123
}
75124

76125
return &proto.GetDataSourceResponse{

0 commit comments

Comments
 (0)