Skip to content

Commit c4a1c52

Browse files
committed
fix review comments
Signed-off-by: ivan-atme <ivan.laishevskiy@atme.com>
1 parent d3d8b9b commit c4a1c52

File tree

3 files changed

+124
-34
lines changed

3 files changed

+124
-34
lines changed

internal/bft/controller.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -632,14 +632,14 @@ func (c *Controller) sync() (viewNum uint64, seq uint64, decisions uint64) {
632632
controllerViewNum := c.currViewNumber
633633
newViewNum = controllerViewNum
634634

635+
newDecisionsInView = c.getCurrentDecisionsInView()
636+
635637
if latestDecisionSeq > controllerSequence {
636638
c.Logger.Infof("Synchronizer returned with sequence %d while the controller is at sequence %d", latestDecisionSeq, controllerSequence)
637639
c.Logger.Debugf("Node %d is setting the checkpoint after sync returned with view %d and seq %d", c.ID, latestDecisionViewNum, latestDecisionSeq)
638640
c.Checkpoint.Set(latestDecision.Proposal, latestDecision.Signatures)
639641
c.verificationSequence.Store(uint64(latestDecision.Proposal.VerificationSequence))
640642
newProposalSequence = latestDecisionSeq + 1
641-
}
642-
if latestDecisionMetadata != nil {
643643
newDecisionsInView = latestDecisionDecisions + 1
644644
}
645645

internal/bft/controller_sync_test.go

Lines changed: 23 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -30,20 +30,13 @@ func (s *stubComm) SendTransaction(targetID uint64, request []byte) {}
3030
func (s *stubComm) Nodes() []uint64 { return []uint64{1, 2, 3, 4} }
3131
func (s *stubComm) BroadcastConsensus(m *protos.Message) {}
3232

33-
// TestSyncDecisionsInView tests that the sync() method correctly preserves
34-
// the DecisionsInView counter. This test reproduces a bug where sync()
35-
// resets DecisionsInView to 0 when the synchronizer returns the same block
36-
// height as the controller (the "already at target height" case).
37-
//
38-
// Bug location: controller.go, sync() function.
39-
// Line 610: newDecisionsInView is zero-initialized.
40-
// Line 635: only updated when latestDecisionSeq > controllerSequence.
41-
// When latestDecisionSeq == controllerSequence, newDecisionsInView stays 0.
42-
//
43-
// Real-world impact: after sync returns same height, changeView() resets
44-
// DecisionsInView from ~9578 to 0. The next proposal carries the correct
45-
// DecisionsInView=9578, which fails validation at view.go:577, causing the
46-
// orderer to reject valid proposals and enter a recovery sync loop.
33+
// TestSyncDecisionsInView tests that the sync() method returns the correct
34+
// DecisionsInView value in two scenarios:
35+
// - same height: sync returns the same sequence as the controller, so
36+
// newDecisionsInView must come from the controller's in-memory state
37+
// (getCurrentDecisionsInView), not remain zero-initialized.
38+
// - higher height: sync returns a higher sequence, so newDecisionsInView
39+
// is overridden from the sync response metadata (+1).
4740
func TestSyncDecisionsInView(t *testing.T) {
4841
const (
4942
controllerSeq = uint64(2365908)
@@ -83,16 +76,17 @@ func TestSyncDecisionsInView(t *testing.T) {
8376
t.Cleanup(collector.Stop)
8477

8578
c := &Controller{
86-
ID: 1,
87-
N: 4,
88-
Logger: log,
89-
Comm: &stubComm{},
90-
Synchronizer: &stubSynchronizer{response: syncResponse},
91-
Checkpoint: checkpoint,
92-
Collector: collector,
93-
InFlight: &InFlightData{},
94-
ViewChanger: &ViewChanger{},
95-
currViewNumber: controllerView,
79+
ID: 1,
80+
N: 4,
81+
Logger: log,
82+
Comm: &stubComm{},
83+
Synchronizer: &stubSynchronizer{response: syncResponse},
84+
Checkpoint: checkpoint,
85+
Collector: collector,
86+
InFlight: &InFlightData{},
87+
ViewChanger: &ViewChanger{},
88+
currViewNumber: controllerView,
89+
currDecisionsInView: controllerDecisions + 1, // checkpoint stores N, decide() increments to N+1
9690
}
9791
// sync() uses grabSyncToken/relinquishSyncToken which need syncChan.
9892
c.syncChan = make(chan struct{}, 1)
@@ -122,18 +116,15 @@ func TestSyncDecisionsInView(t *testing.T) {
122116

123117
assert.Equal(t, controllerView, viewNum, "view number should be preserved")
124118
assert.Equal(t, controllerSeq+1, seq, "proposal sequence should be controllerSeq+1")
125-
// BUG: sync() returns decisions=0 because the condition at line 635
126-
// (latestDecisionSeq > controllerSequence) is false when they're equal,
127-
// so newDecisionsInView is never set from the zero-initialized value.
128-
// The correct value should be controllerDecisions+1 (9579).
119+
// newDecisionsInView should be initialized from the controller's in-memory
120+
// state (getCurrentDecisionsInView), which is controllerDecisions+1.
129121
assert.Equal(t, controllerDecisions+1, decisions,
130-
"DecisionsInView must be preserved when sync returns same height; "+
131-
"got 0 means the bug at controller.go:635 is still present")
122+
"DecisionsInView must be preserved when sync returns same height")
132123
})
133124

134-
t.Run("higher_height_preserves_decisions", func(t *testing.T) {
125+
t.Run("higher_height_overrides_decisions", func(t *testing.T) {
135126
// Synchronizer returns a higher sequence than the controller.
136-
// This is the working code path (line 641).
127+
// In this case newDecisionsInView is overridden from the sync response (+1).
137128
syncDecisions := controllerDecisions + 1
138129
c := newController(t, types.SyncResponse{
139130
Latest: types.Decision{

test/basic_test.go

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,6 +1049,105 @@ func TestCatchingUpWithSyncAutonomous(t *testing.T) {
10491049
assert.Equal(t, uint32(0), atomic.LoadUint32(&detectedSequenceGap))
10501050
}
10511051

1052+
func TestSyncSameHeightPreservesDecisionsInView(t *testing.T) {
1053+
// Scenario:
1054+
// we inject a fake heartbeat with a higher view number to node 4 (index 3).
1055+
// The HeartbeatMonitor sees hb.View > hm.view and calls handler.Sync()
1056+
// directly — no view change involved. Since node 4 already has all blocks,
1057+
// sync() returns the same height.
1058+
t.Parallel()
1059+
network := NewNetwork()
1060+
defer network.Shutdown()
1061+
1062+
testDir, err := os.MkdirTemp("", t.Name())
1063+
assert.NoErrorf(t, err, "generate temporary test dir")
1064+
defer os.RemoveAll(testDir)
1065+
1066+
numberOfNodes := 4
1067+
nodes := make([]*App, 0)
1068+
for i := 1; i <= numberOfNodes; i++ {
1069+
n := newNode(uint64(i), network, t.Name(), testDir, false, 0)
1070+
nodes = append(nodes, n)
1071+
}
1072+
1073+
// Hook node 4's logger to detect:
1074+
// 1. When sync is processed (so we know it's safe to submit more proposals)
1075+
// 2. The bug symptom: "Expected decisions in view" validation failure
1076+
var syncProcessed uint32
1077+
bugDetected := make(chan struct{}, 1)
1078+
baseLogger := nodes[3].Consensus.Logger.(*zap.SugaredLogger).Desugar()
1079+
nodes[3].Consensus.Logger = baseLogger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error {
1080+
if strings.Contains(entry.Message, "get msg from syncChan") {
1081+
atomic.StoreUint32(&syncProcessed, 1)
1082+
}
1083+
if strings.Contains(entry.Message, "Expected decisions in view") {
1084+
select {
1085+
case bugDetected <- struct{}{}:
1086+
default:
1087+
}
1088+
}
1089+
return nil
1090+
})).Sugar()
1091+
1092+
startNodes(nodes, network)
1093+
1094+
// Phase 1: Submit 5 proposals to build up DecisionsInView on all nodes.
1095+
for i := 1; i <= 5; i++ {
1096+
nodes[0].Submit(Request{ID: fmt.Sprintf("%d", i), ClientID: "alice"})
1097+
for j := 0; j < numberOfNodes; j++ {
1098+
<-nodes[j].Delivered
1099+
}
1100+
}
1101+
1102+
// Phase 2:
1103+
// the bug this test validate depended on non-deterministic Go select ordering
1104+
// in the [internal.bft.Controller] run() function:
1105+
// select {
1106+
// case newView := <-c.viewChange:
1107+
// case <-c.syncChan:
1108+
// }
1109+
// That's why it can't be reliably reproduced through the natural path
1110+
// in tests — hence the fake heartbeat that triggers only sync, guaranteeing
1111+
// the bug path.
1112+
//
1113+
// The test injects a fake heartbeat with View=1 (higher than current view 0)
1114+
// to node 4. The HeartbeatMonitor sees hb.View > hm.view and calls
1115+
// handler.Sync() directly, bypassing any view change.
1116+
// The sender must be the leader (ID 1) to pass the leaderID check.
1117+
fakeHeartbeat := &smartbftprotos.Message{
1118+
Content: &smartbftprotos.Message_HeartBeat{
1119+
HeartBeat: &smartbftprotos.HeartBeat{
1120+
View: 1, // higher than node 4's current view (0)
1121+
Seq: 6,
1122+
},
1123+
},
1124+
}
1125+
nodes[3].Consensus.HandleMessage(1, fakeHeartbeat)
1126+
1127+
// Wait for the sync to be processed by the controller's run loop.
1128+
assert.Eventually(t, func() bool {
1129+
return atomic.LoadUint32(&syncProcessed) == 1
1130+
}, 30*time.Second, 50*time.Millisecond,
1131+
"Node 4 should process sync triggered by fake heartbeat")
1132+
1133+
// Phase 3: Submit more proposals. Without the bug fix, node 4's view was
1134+
// restarted with DecisionsInView=0, so it rejects proposals from the
1135+
// leader (which has DecisionsInView=5).
1136+
for i := 6; i <= 10; i++ {
1137+
nodes[0].Submit(Request{ID: fmt.Sprintf("%d", i), ClientID: "alice"})
1138+
for j := 0; j < numberOfNodes; j++ {
1139+
select {
1140+
case <-nodes[j].Delivered:
1141+
case <-bugDetected:
1142+
t.Fatalf("DecisionsInView validation failed on node: sync at same height reset DecisionsInView to 0")
1143+
case <-time.After(30 * time.Second):
1144+
t.Fatalf("Node %d did not deliver proposal %d within timeout", j+1, i)
1145+
}
1146+
}
1147+
}
1148+
1149+
}
1150+
10521151
func TestFollowerStateTransfer(t *testing.T) {
10531152
// Scenario: the leader (n0) is disconnected and so there is a view change
10541153
// a follower (n6) is also disconnected and misses the view change

0 commit comments

Comments
 (0)