Skip to content

Commit 5a77718

Browse files
ti-chi-botHunDunDM
andauthored
region_scatterer: fix the bug that could generate schedule with too few peers (#4570) (#4580)
close #4565, ref #4570 Signed-off-by: HunDunDM <hundundm@gmail.com> Co-authored-by: HunDunDM <hundundm@gmail.com> Co-authored-by: 混沌DM <hundundm@gmail.com>
1 parent a68b208 commit 5a77718

File tree

2 files changed

+76
-12
lines changed

2 files changed

+76
-12
lines changed

server/schedule/region_scatterer.go

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ func (r *RegionScatterer) Scatter(region *core.RegionInfo, group string) (*opera
273273

274274
func (r *RegionScatterer) scatterRegion(region *core.RegionInfo, group string) *operator.Operator {
275275
ordinaryFilter := filter.NewOrdinaryEngineFilter(r.name)
276-
ordinaryPeers := make(map[uint64]*metapb.Peer)
276+
ordinaryPeers := make(map[uint64]*metapb.Peer, len(region.GetPeers()))
277277
specialPeers := make(map[string]map[uint64]*metapb.Peer)
278278
// Group peers by the engine of their stores
279279
for _, peer := range region.GetPeers() {
@@ -282,24 +282,36 @@ func (r *RegionScatterer) scatterRegion(region *core.RegionInfo, group string) *
282282
return nil
283283
}
284284
if ordinaryFilter.Target(r.cluster.GetOpts(), store) {
285-
ordinaryPeers[peer.GetId()] = peer
285+
ordinaryPeers[peer.GetStoreId()] = peer
286286
} else {
287287
engine := store.GetLabelValue(core.EngineKey)
288288
if _, ok := specialPeers[engine]; !ok {
289289
specialPeers[engine] = make(map[uint64]*metapb.Peer)
290290
}
291-
specialPeers[engine][peer.GetId()] = peer
291+
specialPeers[engine][peer.GetStoreId()] = peer
292292
}
293293
}
294294

295-
targetPeers := make(map[uint64]*metapb.Peer)
296-
selectedStores := make(map[uint64]struct{})
297-
scatterWithSameEngine := func(peers map[uint64]*metapb.Peer, context engineContext) {
295+
targetPeers := make(map[uint64]*metapb.Peer, len(region.GetPeers())) // StoreID -> Peer
296+
selectedStores := make(map[uint64]struct{}, len(region.GetPeers())) // StoreID set
297+
scatterWithSameEngine := func(peers map[uint64]*metapb.Peer, context engineContext) { // peers: StoreID -> Peer
298298
for _, peer := range peers {
299-
candidates := r.selectCandidates(region, peer.GetStoreId(), selectedStores, context)
300-
newPeer := r.selectStore(group, peer, peer.GetStoreId(), candidates, context)
301-
targetPeers[newPeer.GetStoreId()] = newPeer
302-
selectedStores[newPeer.GetStoreId()] = struct{}{}
299+
if _, ok := selectedStores[peer.GetStoreId()]; ok {
300+
// It is both sourcePeer and targetPeer itself, no need to select.
301+
continue
302+
}
303+
for {
304+
candidates := r.selectCandidates(region, peer.GetStoreId(), selectedStores, context)
305+
newPeer := r.selectStore(group, peer, peer.GetStoreId(), candidates, context)
306+
targetPeers[newPeer.GetStoreId()] = newPeer
307+
selectedStores[newPeer.GetStoreId()] = struct{}{}
308+
// If the selected peer is a peer other than origin peer in this region,
309+
// it is considered that the selected peer select itself.
310+
// This origin peer re-selects.
311+
if _, ok := peers[newPeer.GetStoreId()]; !ok || peer.GetStoreId() == newPeer.GetStoreId() {
312+
break
313+
}
314+
}
303315
}
304316
}
305317

server/schedule/region_scatterer_test.go

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99

1010
. "github.com/pingcap/check"
1111
"github.com/pingcap/failpoint"
12+
"github.com/pingcap/kvproto/pkg/metapb"
1213
"github.com/tikv/pd/pkg/mock/mockcluster"
1314
"github.com/tikv/pd/server/config"
1415
"github.com/tikv/pd/server/core"
@@ -355,8 +356,8 @@ func (s *testScatterRegionSuite) TestScatterGroupInConcurrency(c *C) {
355356
}
356357
// For leader, we expect each store have about 20 leader for each group
357358
checker(scatterer.ordinaryEngine.selectedLeader, 20, 5)
358-
// For peer, we expect each store have about 50 peers for each group
359-
checker(scatterer.ordinaryEngine.selectedPeer, 50, 15)
359+
// For peer, we expect each store have about 60 peers for each group
360+
checker(scatterer.ordinaryEngine.selectedPeer, 60, 15)
360361
}
361362
}
362363

@@ -476,3 +477,54 @@ func (s *testScatterRegionSuite) TestRegionFromDifferentGroups(c *C) {
476477
}
477478
check(scatterer.ordinaryEngine.selectedPeer)
478479
}
480+
481+
// TestSelectedStores tests if the peer count has changed due to the picking strategy.
482+
// Ref https://github.com/tikv/pd/issues/4565
483+
func (s *testScatterRegionSuite) TestSelectedStores(c *C) {
484+
ctx, cancel := context.WithCancel(context.Background())
485+
defer cancel()
486+
opt := config.NewTestOptions()
487+
tc := mockcluster.NewCluster(ctx, opt)
488+
// Add 4 stores.
489+
for i := uint64(1); i <= 4; i++ {
490+
tc.AddRegionStore(i, 0)
491+
// prevent store from being disconnected
492+
tc.SetStoreLastHeartbeatInterval(i, -10*time.Minute)
493+
}
494+
group := "group"
495+
scatterer := NewRegionScatterer(ctx, tc)
496+
497+
// Put a lot of regions in Store 1/2/3.
498+
for i := uint64(1); i < 100; i++ {
499+
region := tc.AddLeaderRegion(i+10, i%3+1, (i+1)%3+1, (i+2)%3+1)
500+
peers := make(map[uint64]*metapb.Peer, 3)
501+
for _, peer := range region.GetPeers() {
502+
peers[peer.GetStoreId()] = peer
503+
}
504+
scatterer.Put(peers, i%3+1, group)
505+
}
506+
507+
// Try to scatter a region with peer store id 2/3/4
508+
for i := uint64(1); i < 20; i++ {
509+
region := tc.AddLeaderRegion(i+200, i%3+2, (i+1)%3+2, (i+2)%3+2)
510+
op := scatterer.scatterRegion(region, group)
511+
c.Assert(isPeerCountChanged(op), IsFalse)
512+
}
513+
}
514+
515+
func isPeerCountChanged(op *operator.Operator) bool {
516+
if op == nil {
517+
return false
518+
}
519+
add, remove := 0, 0
520+
for i := 0; i < op.Len(); i++ {
521+
step := op.Step(i)
522+
switch step.(type) {
523+
case operator.AddPeer, operator.AddLearner, operator.AddLightPeer, operator.AddLightLearner:
524+
add++
525+
case operator.RemovePeer:
526+
remove++
527+
}
528+
}
529+
return add != remove
530+
}

0 commit comments

Comments
 (0)