@@ -35,7 +35,9 @@ import (
3535 corev1 "k8s.io/api/core/v1"
3636 "k8s.io/apimachinery/pkg/api/equality"
3737 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
38+ "k8s.io/client-go/util/retry"
3839 "k8s.io/client-go/util/workqueue"
40+ "k8s.io/klog/v2"
3941 "k8s.io/utils/clock"
4042 "k8s.io/utils/set"
4143 "sigs.k8s.io/controller-runtime/pkg/client"
@@ -259,12 +261,12 @@ func (c *Client) StartKillNodeThread(ctx context.Context) {
259261 for _ , node := range nodes .Items {
260262 id , err := utils .ParseInstanceID (node .Spec .ProviderID )
261263 if err != nil {
262- log .FromContext (ctx ).Error (err , "unable to parse instance id for node %q" , node . Name )
264+ log .FromContext (ctx ).WithValues ( "Node" , klog . KObj ( & node )). Error (err , "unable to parse instance id" )
263265 continue
264266 }
265- if _ , ok := c .instances .Load (id ); ! ok {
267+ if _ , ok := c .instances .Load (id ); ! ok && node . DeletionTimestamp . IsZero () {
266268 if err = c .kubeClient .Delete (ctx , & node ); client .IgnoreNotFound (err ) != nil {
267- log .FromContext (ctx ).Error (err , "unable to delete node %q due to gone instance" , node . Name )
269+ log .FromContext (ctx ).WithValues ( "Node" , klog . KObj ( & node )). Error (err , "unable to delete due to gone instance" )
268270 continue
269271 }
270272 }
@@ -596,7 +598,9 @@ func (c *Client) CreateFleet(ctx context.Context, input *ec2.CreateFleetInput, _
596598 // This is meant to simulate instance startup time
597599 case <- c .clock .After (30 * time .Second ):
598600 }
599- if err := c .kubeClient .Create (launchCtx , c .toNode (ctx , instance )); err != nil {
601+ if err := retry .OnError (retry .DefaultBackoff , func (_ error ) bool { return true }, func () error {
602+ return c .kubeClient .Create (launchCtx , c .toNode (ctx , instance ))
603+ }); err != nil {
600604 c .instances .Delete (lo .FromPtr (instance .InstanceId ))
601605 c .instanceLaunchCancels .Delete (lo .FromPtr (instance .InstanceId ))
602606 }
0 commit comments