Skip to content

Commit 049275e

Browse files
committed
perf(informer): add TransformFuncs to reduce cache memory usage
Add cache transform functions for the Repository and PipelineRun informers, stripping large unnecessary fields before objects enter the informer cache. Inspired by tektoncd/pipeline#9316. For Repository objects, ManagedFields, Annotations and Status are stripped. The reconciler never reads Repository annotations or Status from the lister; Status is always fetched fresh via direct API call before updates. For PipelineRun objects, ManagedFields and large Spec and Status fields are stripped. The watcher only needs Annotations, Spec.Status (pending check), Status.Conditions, and timing fields. All other data is fetched directly from the API when needed. Benchmark results with production-realistic objects show an 89% JSON size reduction for Repository objects (5.6KB to 600B) and 94% for PipelineRun objects (10.7KB to 677B), with corresponding 8-10x reductions in heap allocation per cached object. Signed-off-by: Akshay Pant <akpant@redhat.com> Asisted-by: Claude <noreply@anthropic.com>
1 parent 738cc02 commit 049275e

File tree

4 files changed

+791
-1
lines changed

4 files changed

+791
-1
lines changed
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
// Package transform provides cache transform functions for reducing memory
2+
// usage in the PAC watcher informer caches.
3+
//
4+
// Transform functions are applied to objects before they are stored in the
5+
// informer cache, allowing us to strip large, unnecessary fields while
6+
// preserving the data needed for reconciliation.
7+
//
8+
// DEVELOPER WARNING:
9+
// If you add new reconciliation logic that reads a field from cached objects
10+
// (via listers), you MUST verify that field is not stripped by these transforms.
11+
// Fields stripped from cached objects will be nil/empty even though they exist
12+
// in etcd. If you need a stripped field, fetch the full object via the API
13+
// client instead of the lister.
14+
package transform
15+
16+
import (
17+
pacv1alpha1 "github.com/openshift-pipelines/pipelines-as-code/pkg/apis/pipelinesascode/v1alpha1"
18+
tektonv1 "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1"
19+
"k8s.io/client-go/tools/cache"
20+
)
21+
22+
// RepositoryForCache strips fields from Repository objects before they are
23+
// stored in the informer cache to reduce memory usage.
24+
//
25+
// Fields stripped:
26+
// - ManagedFields: written by the API server, not needed for reconciliation
27+
// - Annotations: no reconciler logic reads Repository annotations from the
28+
// lister; the largest annotation is kubectl.kubernetes.io/last-applied-configuration
29+
// - Status: the reconciler always fetches Repository.Status via a direct API
30+
// call before updating it; it is never read from the lister
31+
func RepositoryForCache(obj any) (any, error) {
32+
if tombstone, ok := obj.(cache.DeletedFinalStateUnknown); ok {
33+
transformed, err := RepositoryForCache(tombstone.Obj)
34+
if err != nil {
35+
return obj, nil //nolint:nilerr // return original on error for graceful degradation
36+
}
37+
return cache.DeletedFinalStateUnknown{Key: tombstone.Key, Obj: transformed}, nil
38+
}
39+
40+
repo, ok := obj.(*pacv1alpha1.Repository)
41+
if !ok {
42+
return obj, nil
43+
}
44+
45+
repo.ManagedFields = nil
46+
repo.Annotations = nil
47+
repo.Status = nil
48+
49+
return repo, nil
50+
}
51+
52+
// PipelineRunForCache strips fields from PipelineRun objects before they are
53+
// stored in the informer cache to reduce memory usage.
54+
//
55+
// Fields the PAC watcher reads from cached PipelineRuns:
56+
// - ObjectMeta: name, namespace, labels, annotations (PAC state/repo keys),
57+
// finalizers, deletionTimestamp
58+
// - Spec.Status: checked for PipelineRunSpecStatusPending
59+
// - Status.Conditions: checked for completion state and reason
60+
// - Status.StartTime, Status.CompletionTime: used for metrics
61+
//
62+
// All other Spec and Status fields are stripped. When the reconciler needs
63+
// the full object (e.g. postFinalStatus, GetStatusFromTaskStatusOrFromAsking),
64+
// it fetches it directly from the API server.
65+
func PipelineRunForCache(obj any) (any, error) {
66+
if tombstone, ok := obj.(cache.DeletedFinalStateUnknown); ok {
67+
transformed, err := PipelineRunForCache(tombstone.Obj)
68+
if err != nil {
69+
return obj, nil //nolint:nilerr // return original on error for graceful degradation
70+
}
71+
return cache.DeletedFinalStateUnknown{Key: tombstone.Key, Obj: transformed}, nil
72+
}
73+
74+
pr, ok := obj.(*tektonv1.PipelineRun)
75+
if !ok {
76+
return obj, nil
77+
}
78+
79+
pr.ManagedFields = nil
80+
81+
// Strip large Spec fields — watcher only checks Spec.Status (pending state)
82+
pr.Spec.PipelineRef = nil
83+
pr.Spec.PipelineSpec = nil
84+
pr.Spec.Params = nil
85+
pr.Spec.Workspaces = nil
86+
pr.Spec.TaskRunSpecs = nil
87+
pr.Spec.TaskRunTemplate = tektonv1.PipelineTaskRunTemplate{}
88+
pr.Spec.Timeouts = nil
89+
90+
// Strip large Status fields — watcher only reads Conditions, StartTime, CompletionTime
91+
pr.Status.PipelineSpec = nil
92+
pr.Status.ChildReferences = nil
93+
pr.Status.Provenance = nil
94+
pr.Status.SpanContext = nil
95+
96+
return pr, nil
97+
}

0 commit comments

Comments
 (0)