Skip to content

Commit 90a834f

Browse files
authored
fix: improve instance profile propagation handling (#639)
1 parent d19ebb1 commit 90a834f

2 files changed

Lines changed: 63 additions & 10 deletions

File tree

common/step_iam_instance_profile.go

Lines changed: 63 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,17 @@ import (
88
"encoding/json"
99
"fmt"
1010
"log"
11+
"strings"
1112
"time"
1213

1314
"github.com/aws/aws-sdk-go-v2/aws"
15+
"github.com/aws/aws-sdk-go-v2/service/ec2"
16+
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
1417
"github.com/aws/aws-sdk-go-v2/service/iam"
18+
"github.com/hashicorp/packer-plugin-amazon/common/clients"
1519
"github.com/hashicorp/packer-plugin-sdk/multistep"
1620
packersdk "github.com/hashicorp/packer-plugin-sdk/packer"
21+
"github.com/hashicorp/packer-plugin-sdk/retry"
1722
"github.com/hashicorp/packer-plugin-sdk/template/interpolate"
1823
"github.com/hashicorp/packer-plugin-sdk/uuid"
1924
)
@@ -169,12 +174,66 @@ func (s *StepIamInstanceProfile) Run(ctx context.Context, state multistep.StateB
169174
return multistep.ActionHalt
170175
}
171176

172-
s.roleIsAttached = true
173-
// Sleep to allow IAM changes to propagate
174177
// In aws sdk go v2, we noticed if there was no Wait, the spot fleet requests were failing even with retry.
175-
// Adding a delay here to allow IAM changes to propagate
178+
// Running a dummy instance in DryRun mode to validate that the instance profile is visible to EC2
179+
180+
ui.Say("Waiting for the change to propagate because of eventual consistency...")
181+
182+
ec2Client := state.Get("ec2v2").(clients.Ec2Client)
183+
sourceImageRaw, exists := state.GetOk("source_image")
184+
if !exists {
185+
err := fmt.Errorf("source_image not available in state for IAM validation")
186+
state.Put("error", err)
187+
return multistep.ActionHalt
188+
}
189+
sourceImage := sourceImageRaw.(*ec2types.Image)
190+
191+
err = retry.Config{
192+
Tries: 11,
193+
ShouldRetry: func(err error) bool {
194+
errStr := err.Error()
195+
return strings.Contains(errStr, "Invalid IAM Instance Profile")
196+
},
197+
RetryDelay: (&retry.Backoff{
198+
InitialBackoff: 500 * time.Millisecond,
199+
MaxBackoff: 5 * time.Second,
200+
Multiplier: 2,
201+
}).Linear,
202+
}.Run(ctx, func(ctx context.Context) error {
203+
204+
_, err := ec2Client.RunInstances(ctx, &ec2.RunInstancesInput{
205+
ImageId: sourceImage.ImageId,
206+
MinCount: aws.Int32(1),
207+
MaxCount: aws.Int32(1),
208+
InstanceType: ec2types.InstanceTypeT3Nano,
209+
DryRun: aws.Bool(true),
210+
IamInstanceProfile: &ec2types.IamInstanceProfileSpecification{
211+
Name: aws.String(s.createdInstanceProfileName),
212+
},
213+
})
176214

177-
time.Sleep(5 * time.Second)
215+
// For dry run, we expect a DryRunOperation error if the call would succeed
216+
// Any other error indicates the instance profile isn't visible to EC2 yet
217+
if err != nil {
218+
errStr := err.Error()
219+
if strings.Contains(errStr, "DryRunOperation") {
220+
log.Printf("[DEBUG] EC2 can see IAM instance profile %s", s.createdInstanceProfileName)
221+
return nil
222+
}
223+
log.Printf("[DEBUG] EC2 dry run failed: %s", errStr)
224+
return err
225+
}
226+
return nil
227+
})
228+
229+
if err != nil {
230+
err := fmt.Errorf("timed out waiting for IAM changes to propagate to EC2: %s", err)
231+
log.Printf("[DEBUG] %s", err.Error())
232+
state.Put("error", err)
233+
return multistep.ActionHalt
234+
}
235+
236+
s.roleIsAttached = true
178237
state.Put("iamInstanceProfile", aws.ToString(profileResp.InstanceProfile.InstanceProfileName))
179238
}
180239

common/step_run_spot_instance.go

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -420,12 +420,6 @@ func (s *StepRunSpotInstance) Run(ctx context.Context, state multistep.StateBag)
420420
err = retry.Config{
421421
Tries: 11,
422422
ShouldRetry: func(err error) bool {
423-
if strings.Contains(err.Error(), "Invalid IAM Instance Profile name") {
424-
// eventual consistency of the profile. PutRolePolicy &
425-
// AddRoleToInstanceProfile are eventually consistent and once
426-
// we can wait on those operations, this can be removed.
427-
return true
428-
}
429423
if err.Error() == "InsufficientInstanceCapacity" {
430424
return true
431425
}

0 commit comments

Comments
 (0)