Skip to content

Commit 2949a53

Browse files
ti-chi-botnolouchti-chi-bot[bot]
authored
checker: fix the too many orphan peers cannot be removed (#6574) (#6576)
close #6573, ref #6574 rule-checker: fix the too many orphan peers that cannot be removed - let the health peer can be removed once there exist redundant Signed-off-by: nolouch <nolouch@gmail.com> Co-authored-by: nolouch <nolouch@gmail.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com>
1 parent 0f65c6c commit 2949a53

File tree

2 files changed

+48
-6
lines changed

2 files changed

+48
-6
lines changed

pkg/schedule/checker/rule_checker.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,11 +455,18 @@ loopFits:
455455
// If hasUnhealthyFit is true, try to remove unhealthy orphan peers only if number of OrphanPeers is >= 2.
456456
// Ref https://github.com/tikv/pd/issues/4045
457457
if len(fit.OrphanPeers) >= 2 {
458+
hasHealthPeer := false
458459
for _, orphanPeer := range fit.OrphanPeers {
459460
if isUnhealthyPeer(orphanPeer.GetId()) {
460461
ruleCheckerRemoveOrphanPeerCounter.Inc()
461462
return operator.CreateRemovePeerOperator("remove-orphan-peer", c.cluster, 0, region, orphanPeer.StoreId)
462463
}
464+
if hasHealthPeer {
465+
// there already exists a healthy orphan peer, so we can remove other orphan Peers.
466+
ruleCheckerRemoveOrphanPeerCounter.Inc()
467+
return operator.CreateRemovePeerOperator("remove-orphan-peer", c.cluster, 0, region, orphanPeer.StoreId)
468+
}
469+
hasHealthPeer = true
463470
}
464471
}
465472
ruleCheckerSkipRemoveOrphanPeerCounter.Inc()

pkg/schedule/checker/rule_checker_test.go

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,39 @@ func (suite *ruleCheckerTestSuite) TestFixOrphanPeers() {
168168
suite.Equal(uint64(4), op.Step(0).(operator.RemovePeer).FromStore)
169169
}
170170

171+
func (suite *ruleCheckerTestSuite) TestFixToManyOrphanPeers() {
172+
suite.cluster.AddLeaderStore(1, 1)
173+
suite.cluster.AddLeaderStore(2, 1)
174+
suite.cluster.AddLeaderStore(3, 1)
175+
suite.cluster.AddLeaderStore(4, 1)
176+
suite.cluster.AddLeaderStore(5, 1)
177+
suite.cluster.AddLeaderStore(6, 1)
178+
suite.cluster.AddRegionWithLearner(1, 1, []uint64{2, 3}, []uint64{4, 5, 6})
179+
// Case1:
180+
// store 4, 5, 6 are orphan peers, and peer on store 3 is pending and down peer.
181+
region := suite.cluster.GetRegion(1)
182+
region = region.Clone(
183+
core.WithDownPeers([]*pdpb.PeerStats{{Peer: region.GetStorePeer(3), DownSeconds: 60000}}),
184+
core.WithPendingPeers([]*metapb.Peer{region.GetStorePeer(3)}))
185+
suite.cluster.PutRegion(region)
186+
op := suite.rc.Check(suite.cluster.GetRegion(1))
187+
suite.NotNil(op)
188+
suite.Equal("remove-orphan-peer", op.Desc())
189+
suite.Equal(uint64(5), op.Step(0).(operator.RemovePeer).FromStore)
190+
191+
// Case2:
192+
// store 4, 5, 6 are orphan peers, and peer on store 3 is down peer. and peer on store 4, 5 are pending.
193+
region = suite.cluster.GetRegion(1)
194+
region = region.Clone(
195+
core.WithDownPeers([]*pdpb.PeerStats{{Peer: region.GetStorePeer(3), DownSeconds: 60000}}),
196+
core.WithPendingPeers([]*metapb.Peer{region.GetStorePeer(4), region.GetStorePeer(5)}))
197+
suite.cluster.PutRegion(region)
198+
op = suite.rc.Check(suite.cluster.GetRegion(1))
199+
suite.NotNil(op)
200+
suite.Equal("remove-orphan-peer", op.Desc())
201+
suite.Equal(uint64(4), op.Step(0).(operator.RemovePeer).FromStore)
202+
}
203+
171204
func (suite *ruleCheckerTestSuite) TestFixOrphanPeers2() {
172205
// check orphan peers can only be handled when all rules are satisfied.
173206
suite.cluster.AddLabelsStore(1, 1, map[string]string{"foo": "bar"})
@@ -312,7 +345,7 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness() {
312345
suite.cluster.AddLabelsStore(1, 1, map[string]string{"A": "leader"})
313346
suite.cluster.AddLabelsStore(2, 1, map[string]string{"B": "follower"})
314347
suite.cluster.AddLabelsStore(3, 1, map[string]string{"C": "voter"})
315-
suite.cluster.AddLeaderRegion(1, 1, 2)
348+
suite.cluster.AddLeaderRegion(1, 1)
316349

317350
suite.ruleManager.SetRule(&placement.Rule{
318351
GroupID: "pd",
@@ -329,6 +362,7 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness() {
329362
op := suite.rc.Check(suite.cluster.GetRegion(1))
330363
suite.NotNil(op)
331364
suite.Equal("add-rule-peer", op.Desc())
365+
fmt.Println(op)
332366
suite.Equal(uint64(3), op.Step(0).(operator.AddLearner).ToStore)
333367
suite.True(op.Step(0).(operator.AddLearner).IsWitness)
334368
}
@@ -337,24 +371,25 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness2() {
337371
suite.cluster.AddLabelsStore(1, 1, map[string]string{"A": "leader"})
338372
suite.cluster.AddLabelsStore(2, 1, map[string]string{"B": "voter"})
339373
suite.cluster.AddLabelsStore(3, 1, map[string]string{"C": "voter"})
340-
suite.cluster.AddLeaderRegion(1, 1, 2, 3)
374+
suite.cluster.AddLabelsStore(4, 1, map[string]string{"D": "voter"})
375+
suite.cluster.AddLeaderRegion(1, 1, 2, 3, 4)
341376

342377
suite.ruleManager.SetRule(&placement.Rule{
343378
GroupID: "pd",
344379
ID: "r1",
345380
Index: 100,
346-
Override: true,
381+
Override: false,
347382
Role: placement.Voter,
348383
Count: 1,
349384
IsWitness: true,
350385
LabelConstraints: []placement.LabelConstraint{
351-
{Key: "C", Op: "in", Values: []string{"voter"}},
386+
{Key: "D", Op: "in", Values: []string{"voter"}},
352387
},
353388
})
354389
op := suite.rc.Check(suite.cluster.GetRegion(1))
355390
suite.NotNil(op)
356391
suite.Equal("fix-witness-peer", op.Desc())
357-
suite.Equal(uint64(3), op.Step(0).(operator.BecomeWitness).StoreID)
392+
suite.Equal(uint64(4), op.Step(0).(operator.BecomeWitness).StoreID)
358393
}
359394

360395
func (suite *ruleCheckerTestSuite) TestFixRuleWitness3() {
@@ -366,7 +401,7 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness3() {
366401
r := suite.cluster.GetRegion(1)
367402
// set peer3 to witness
368403
r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(3)}))
369-
404+
suite.cluster.PutRegion(r)
370405
op := suite.rc.Check(r)
371406
suite.NotNil(op)
372407
suite.Equal("fix-non-witness-peer", op.Desc())

0 commit comments

Comments
 (0)