Skip to content

Commit 611112c

Browse files
committed
Fix premature LeaseAcquired in LeaseActor Granting state
1 parent 2524059 commit 611112c

2 files changed

Lines changed: 80 additions & 1 deletion

File tree

src/coordination/azure/Akka.Coordination.Azure.Tests/LeaseActorSpec.cs

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,86 @@ public void LeaseAcquireInReadingState()
393393
// TODO this could accumulate senders and reply to all, atm it'll log saying previous action hasn't finished
394394
}
395395

396+
// Regression test: when a write conflict occurs in Granting state and the blob has no owner,
397+
// LeaseAcquired must NOT be sent until the retry write succeeds. Sending it prematurely causes
398+
// the shard to start without localGranted=true and without heartbeats, leading to a shard-level
399+
// split brain if another node takes the lease before the retry completes.
400+
[Fact(DisplayName = "LeaseActor should not send LeaseAcquired prematurely on conflict retry in Granting state")]
401+
public void ShouldNotSendLeaseAcquiredPrematurelyOnConflictRetry()
402+
{
403+
RunTest(() =>
404+
{
405+
// Start acquiring: read returns empty lease
406+
UnderTest.Tell(new LeaseActor.Acquire(), Sender);
407+
LeaseProbe.ExpectMsg(LeaseName);
408+
LeaseProbe.Reply(new LeaseResource("", CurrentVersion, CurrentTime));
409+
410+
// First write attempt
411+
UpdateProbe.ExpectMsg((OwnerName, CurrentVersion));
412+
413+
// Conflict: version moved but no owner (another node wrote and released)
414+
var conflictVersion = new ETag((CurrentVersionCount + 3).ToString());
415+
UpdateProbe.Reply(
416+
new Left<LeaseResource, LeaseResource>(
417+
new LeaseResource("", conflictVersion, CurrentTime)));
418+
419+
// LeaseAcquired must NOT have been sent yet — the retry hasn't completed
420+
SenderProbe.ExpectNoMsg(TimeSpan.FromMilliseconds(100));
421+
422+
// Retry write is issued with the new version
423+
UpdateProbe.ExpectMsg((OwnerName, conflictVersion));
424+
425+
// Retry also conflicts, but this time another node owns it
426+
var stolenVersion = new ETag((CurrentVersionCount + 5).ToString());
427+
UpdateProbe.Reply(
428+
new Left<LeaseResource, LeaseResource>(
429+
new LeaseResource("another-node", stolenVersion, CurrentTime)));
430+
431+
// Now the caller should get LeaseTaken (not a stale LeaseAcquired)
432+
SenderProbe.ExpectMsg<LeaseActor.LeaseTaken>();
433+
Granted.Value.Should().BeFalse();
434+
});
435+
}
436+
437+
// Verify that conflict retry with no owner eventually succeeds and properly
438+
// transitions to Granted state with heartbeats enabled
439+
[Fact(DisplayName = "LeaseActor should acquire lease after conflict retry succeeds")]
440+
public void ShouldAcquireLeaseAfterConflictRetrySucceeds()
441+
{
442+
RunTest(() =>
443+
{
444+
UnderTest.Tell(new LeaseActor.Acquire(), Sender);
445+
LeaseProbe.ExpectMsg(LeaseName);
446+
LeaseProbe.Reply(new LeaseResource("", CurrentVersion, CurrentTime));
447+
448+
// First write attempt
449+
UpdateProbe.ExpectMsg((OwnerName, CurrentVersion));
450+
451+
// Conflict: version moved but no owner
452+
var conflictVersion = new ETag((CurrentVersionCount + 3).ToString());
453+
UpdateProbe.Reply(
454+
new Left<LeaseResource, LeaseResource>(
455+
new LeaseResource("", conflictVersion, CurrentTime)));
456+
457+
// No premature LeaseAcquired
458+
SenderProbe.ExpectNoMsg(TimeSpan.FromMilliseconds(100));
459+
460+
// Retry write succeeds
461+
UpdateProbe.ExpectMsg((OwnerName, conflictVersion));
462+
var grantedVersion = new ETag((CurrentVersionCount + 6).ToString());
463+
UpdateProbe.Reply(
464+
new Right<LeaseResource, LeaseResource>(
465+
new LeaseResource(OwnerName, grantedVersion, CurrentTime)));
466+
467+
// NOW LeaseAcquired should be sent
468+
SenderProbe.ExpectMsg<LeaseActor.LeaseAcquired>();
469+
Granted.Value.Should().BeTrue();
470+
471+
// Heartbeat should be running (proves we're in Granted state)
472+
UpdateProbe.ExpectMsg((OwnerName, grantedVersion));
473+
});
474+
}
475+
396476
[Fact(DisplayName = "LeaseActor should return lease taken if conflict when updating lease")]
397477
public void ReturnLeaseTakenIfConflictWhenUpdatingLease()
398478
{

src/coordination/azure/Akka.Coordination.Azure/LeaseActor.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,6 @@ public LeaseActor(IAzureApi client, LeaseSettings settings, string leaseName, At
359359
throw new LeaseException(
360360
$"Update response from Azure Blob should not return the same version: Response: {leftResponse}. Client: {data}");
361361
// Try again as lock version has moved on but is not taken
362-
who.Tell(LeaseAcquired.Instance);
363362
_client.UpdateLeaseResource(leaseName, _ownerName, version)
364363
.ContinueWith(t => new WriteResponse(t.Result))
365364
.PipeTo(Self, failure: FlattenAggregateException);

0 commit comments

Comments
 (0)