@@ -24,6 +24,7 @@ import (
2424 "errors"
2525 "fmt"
2626 "net/http"
27+ "sort"
2728 "strings"
2829 "sync"
2930
@@ -105,6 +106,48 @@ func (p *DefaultProvider) GetClusterCNI(_ context.Context) (string, error) {
105106 return p .clusterCNI , nil
106107}
107108
109+ // Get the ID of the default nodepool. If there is no default nodepool, select the nodepool with the most HealthyNodes.
110+ func (p * DefaultProvider ) getDefaultNodePoolId (ctx context.Context ) (* string , error ) {
111+ resp , err := p .ackClient .DescribeClusterNodePools (tea .String (p .clusterID ), & ackclient.DescribeClusterNodePoolsRequest {})
112+ if err != nil {
113+ log .FromContext (ctx ).Error (err , "Failed to describe cluster nodepools" )
114+ return nil , err
115+ }
116+ if resp == nil || resp .Body == nil || resp .Body .Nodepools == nil {
117+ return nil , fmt .Errorf ("empty describe cluster nodepools response" )
118+ }
119+ if len (resp .Body .Nodepools ) == 0 {
120+ return nil , fmt .Errorf ("no nodepool found" )
121+ }
122+
123+ nodepools := resp .Body .Nodepools
124+ sort .Slice (nodepools , func (i , j int ) bool {
125+ if nodepools [i ].NodepoolInfo == nil || nodepools [j ].NodepoolInfo == nil {
126+ return false
127+ }
128+
129+ if nodepools [i ].NodepoolInfo .IsDefault != nil && nodepools [j ].NodepoolInfo .IsDefault != nil {
130+ if * nodepools [i ].NodepoolInfo .IsDefault && ! * nodepools [j ].NodepoolInfo .IsDefault {
131+ return true
132+ }
133+ if ! * nodepools [i ].NodepoolInfo .IsDefault && * nodepools [j ].NodepoolInfo .IsDefault {
134+ return false
135+ }
136+ }
137+
138+ if nodepools [i ].Status == nil || nodepools [j ].Status == nil || nodepools [i ].Status .HealthyNodes == nil || nodepools [j ].Status .HealthyNodes == nil {
139+ return false
140+ }
141+ return * nodepools [i ].Status .HealthyNodes > * nodepools [j ].Status .HealthyNodes
142+ })
143+
144+ targetNodepool := nodepools [0 ]
145+ if targetNodepool .NodepoolInfo == nil {
146+ return nil , fmt .Errorf ("target describe cluster nodepool is empty" )
147+ }
148+ return targetNodepool .NodepoolInfo .NodepoolId , nil
149+ }
150+
108151func (p * DefaultProvider ) GetNodeRegisterScript (ctx context.Context ,
109152 capacityType string ,
110153 nodeClaim * karpv1.NodeClaim ,
@@ -114,8 +157,19 @@ func (p *DefaultProvider) GetNodeRegisterScript(ctx context.Context,
114157 return p .resolveUserData (cachedScript .(string ), labels , nodeClaim , kubeletCfg ), nil
115158 }
116159
160+ nodepoolId , err := p .getDefaultNodePoolId (ctx )
161+ if err != nil {
162+ // Don't return here, we can process when there is no default cluster id.
163+ // We need to try to obtain a usable nodepool ID in order to get the cluster attach scripts.
164+ // One known scenario is on an ACK cluster with version 1.24, where the user deleted the default nodepool and
165+ // created a nodepool with a containerd runtime. The DescribeClusterAttachScriptsRequest api will use the
166+ // CRI configuration of the deleted default nodepool, which might be using the Docker runtime.
167+ // This could result in nodes failing to register to the new cluster.
168+ log .FromContext (ctx ).Error (err , "Failed to get default nodepool id" )
169+ }
117170 reqPara := & ackclient.DescribeClusterAttachScriptsRequest {
118171 KeepInstanceName : tea .Bool (true ),
172+ NodepoolId : nodepoolId ,
119173 }
120174 resp , err := p .ackClient .DescribeClusterAttachScripts (tea .String (p .clusterID ), reqPara )
121175 if err != nil {
0 commit comments