Skip to content

Commit a2195fb

Browse files
chore: Fix double delete on kill node thread (#8194)
1 parent 5458a20 commit a2195fb

File tree

1 file changed

+8
-4
lines changed

1 file changed

+8
-4
lines changed

kwok/ec2/ec2.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,9 @@ import (
3535
corev1 "k8s.io/api/core/v1"
3636
"k8s.io/apimachinery/pkg/api/equality"
3737
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
38+
"k8s.io/client-go/util/retry"
3839
"k8s.io/client-go/util/workqueue"
40+
"k8s.io/klog/v2"
3941
"k8s.io/utils/clock"
4042
"k8s.io/utils/set"
4143
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -259,12 +261,12 @@ func (c *Client) StartKillNodeThread(ctx context.Context) {
259261
for _, node := range nodes.Items {
260262
id, err := utils.ParseInstanceID(node.Spec.ProviderID)
261263
if err != nil {
262-
log.FromContext(ctx).Error(err, "unable to parse instance id for node %q", node.Name)
264+
log.FromContext(ctx).WithValues("Node", klog.KObj(&node)).Error(err, "unable to parse instance id")
263265
continue
264266
}
265-
if _, ok := c.instances.Load(id); !ok {
267+
if _, ok := c.instances.Load(id); !ok && node.DeletionTimestamp.IsZero() {
266268
if err = c.kubeClient.Delete(ctx, &node); client.IgnoreNotFound(err) != nil {
267-
log.FromContext(ctx).Error(err, "unable to delete node %q due to gone instance", node.Name)
269+
log.FromContext(ctx).WithValues("Node", klog.KObj(&node)).Error(err, "unable to delete due to gone instance")
268270
continue
269271
}
270272
}
@@ -596,7 +598,9 @@ func (c *Client) CreateFleet(ctx context.Context, input *ec2.CreateFleetInput, _
596598
// This is meant to simulate instance startup time
597599
case <-c.clock.After(30 * time.Second):
598600
}
599-
if err := c.kubeClient.Create(launchCtx, c.toNode(ctx, instance)); err != nil {
601+
if err := retry.OnError(retry.DefaultBackoff, func(_ error) bool { return true }, func() error {
602+
return c.kubeClient.Create(launchCtx, c.toNode(ctx, instance))
603+
}); err != nil {
600604
c.instances.Delete(lo.FromPtr(instance.InstanceId))
601605
c.instanceLaunchCancels.Delete(lo.FromPtr(instance.InstanceId))
602606
}

0 commit comments

Comments
 (0)