Skip to content

Commit b69e69a

Browse files
committed
ssa: fix timeout error not mentioning object
Signed-off-by: Matheus Pimenta <matheuscscp@gmail.com>
1 parent f6b5193 commit b69e69a

2 files changed

Lines changed: 95 additions & 7 deletions

File tree

ssa/manager_wait.go

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"errors"
2323
"fmt"
2424
"strings"
25+
"sync"
2526
"time"
2627

2728
apierrors "k8s.io/apimachinery/pkg/api/errors"
@@ -96,12 +97,14 @@ func (m *ResourceManager) WaitForSetWithContext(ctx context.Context, set object.
9697
}
9798
eventsChan := m.poller.Poll(ctx, set, pollingOpts)
9899

100+
var mu sync.Mutex
99101
lastStatus := make(map[object.ObjMetadata]*event.ResourceStatus)
100102

101103
done := statusCollector.ListenWithObserver(eventsChan, collector.ObserverFunc(
102104
func(statusCollector *collector.ResourceStatusCollector, e event.Event) {
103105
var rss []*event.ResourceStatus
104106
counts := make(map[status.Status]int)
107+
mu.Lock()
105108
for _, rs := range statusCollector.ResourceStatuses {
106109
if rs == nil {
107110
continue
@@ -117,6 +120,7 @@ func (m *ResourceManager) WaitForSetWithContext(ctx context.Context, set object.
117120
rss = append(rss, rs)
118121
counts[effectiveStatus]++
119122
}
123+
mu.Unlock()
120124

121125
// If only Failed or Current statuses are present,
122126
// we can consider this a terminal state. Detecting
@@ -152,10 +156,7 @@ func (m *ResourceManager) WaitForSetWithContext(ctx context.Context, set object.
152156
return ctx.Err()
153157
}
154158

155-
if statusCollector.Error != nil {
156-
return statusCollector.Error
157-
}
158-
159+
mu.Lock()
159160
var errs []string
160161
for id, rs := range statusCollector.ResourceStatuses {
161162
switch {
@@ -172,15 +173,17 @@ func (m *ResourceManager) WaitForSetWithContext(ctx context.Context, set object.
172173
errors.Is(ctx.Err(), context.DeadlineExceeded) &&
173174
lastStatus[id].Status != status.CurrentStatus:
174175
var builder strings.Builder
175-
builder.WriteString(fmt.Sprintf("%s status: '%s'",
176-
utils.FmtObjMetadata(rs.Identifier), lastStatus[id].Status))
176+
fmt.Fprintf(&builder, "%s status: '%s'",
177+
utils.FmtObjMetadata(rs.Identifier), lastStatus[id].Status)
177178
if rs.Error != nil {
178-
builder.WriteString(fmt.Sprintf(": %s", rs.Error))
179+
fmt.Fprintf(&builder, ": %s", rs.Error)
179180
}
180181
errs = append(errs, builder.String())
181182
}
182183
}
183184

185+
mu.Unlock()
186+
184187
if len(errs) > 0 {
185188
msg := "failed early due to stalled resources"
186189
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
@@ -189,6 +192,10 @@ func (m *ResourceManager) WaitForSetWithContext(ctx context.Context, set object.
189192
return fmt.Errorf("%s: [%s]", msg, strings.Join(errs, ", "))
190193
}
191194

195+
if statusCollector.Error != nil {
196+
return statusCollector.Error
197+
}
198+
192199
return nil
193200
}
194201

ssa/manager_wait_test.go

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,87 @@ func TestWaitForSet_RateLimiterError(t *testing.T) {
411411
}
412412
}
413413

414+
func TestWaitForSet_RateLimiterErrorIncludesObjectNames(t *testing.T) {
415+
g := NewWithT(t)
416+
417+
id := generateName("rlname")
418+
419+
// Create real resources on the cluster that will reach Current status.
420+
objects, err := readManifest("testdata/test14.yaml", id)
421+
g.Expect(err).NotTo(HaveOccurred())
422+
423+
manager.SetOwnerLabels(objects, "infra", "default")
424+
cs, err := manager.ApplyAllStaged(context.Background(), objects, DefaultApplyOptions())
425+
g.Expect(err).NotTo(HaveOccurred())
426+
427+
// Add a non-existent Deployment to the wait set. This simulates the
428+
// real-world scenario where a health check includes a resource that
429+
// doesn't exist on the cluster (e.g. "does-not-exist").
430+
nonExistent := object.ObjMetadata{
431+
Name: "does-not-exist",
432+
Namespace: id,
433+
GroupKind: schema.GroupKind{Group: "apps", Kind: "Deployment"},
434+
}
435+
waitSet := append(cs.ToObjMetadataSet(), nonExistent)
436+
437+
// Use a custom status reader that simulates the rate limiter firing
438+
// at the context deadline. In production, the Go rate limiter fires
439+
// preemptively when it detects that waiting would exceed the deadline.
440+
// We simulate this by sleeping past the deadline before returning the
441+
// rate limiter error, ensuring both statusCollector.Error is set AND
442+
// ctx.Err() is context.DeadlineExceeded.
443+
timeout := 500 * time.Millisecond
444+
start := time.Now()
445+
manager.poller = polling.NewStatusPoller(manager.client, restMapper, polling.Options{
446+
CustomStatusReaders: []engine.StatusReader{
447+
kstatusreaders.NewGenericStatusReader(restMapper,
448+
func(u *unstructured.Unstructured) (*status.Result, error) {
449+
if time.Since(start) > 300*time.Millisecond {
450+
// Sleep past the context deadline to ensure
451+
// ctx.Err() == context.DeadlineExceeded when
452+
// WaitForSetWithContext processes the error.
453+
remaining := timeout - time.Since(start)
454+
if remaining > 0 {
455+
time.Sleep(remaining + 50*time.Millisecond)
456+
}
457+
return nil, fmt.Errorf("rate: Wait(n=1) would exceed context deadline")
458+
}
459+
return status.Compute(u)
460+
},
461+
),
462+
},
463+
})
464+
defer func() {
465+
manager.poller = poller
466+
}()
467+
468+
err = manager.WaitForSet(waitSet, WaitOptions{
469+
Interval: 100 * time.Millisecond,
470+
Timeout: timeout,
471+
})
472+
473+
g.Expect(err).To(HaveOccurred())
474+
errMsg := err.Error()
475+
t.Logf("error message: %s", errMsg)
476+
477+
// The error must include the name of the non-existent resource.
478+
// Before the fix, statusCollector.Error was returned directly without
479+
// any object context, producing: "rate: Wait(n=1) would exceed context deadline".
480+
// After the fix, the per-object status loop runs first, producing:
481+
// "timeout waiting for: [Deployment/id/does-not-exist status: 'NotFound']".
482+
g.Expect(errMsg).To(ContainSubstring("does-not-exist"),
483+
"error must contain the name of the non-existent resource")
484+
g.Expect(errMsg).To(ContainSubstring("timeout waiting for"),
485+
"error must indicate it was a timeout")
486+
487+
// ConfigMaps that reached Current status should NOT appear in the error.
488+
for i := 1; i <= 4; i++ {
489+
cmName := fmt.Sprintf("%s-cm%d", id, i)
490+
g.Expect(errMsg).NotTo(ContainSubstring(cmName),
491+
"error should not contain ConfigMap %s which reached Current status", cmName)
492+
}
493+
}
494+
414495
func TestWaitForSet_ErrorOnReaderError(t *testing.T) {
415496
g := NewWithT(t)
416497

0 commit comments

Comments
 (0)