Skip to content

Commit 42e13ff

Browse files
committed
add metrcis
Signed-off-by: yisaer <disxiaofei@163.com>
1 parent a44c9cc commit 42e13ff

File tree

3 files changed

+49
-2
lines changed

3 files changed

+49
-2
lines changed

server/schedule/filter/filters.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -686,6 +686,8 @@ const (
686686
EngineKey = "engine"
687687
// EngineTiFlash is the tiflash value of the engine label.
688688
EngineTiFlash = "tiflash"
689+
// EngineTiKV indicates the tikv engine in metrics
690+
EngineTiKV = "tikv"
689691
)
690692

691693
var allSpecialUses = []string{SpecialUseHotRegion, SpecialUseReserved}

server/schedule/metrics.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,22 @@ var (
7373
Name: "store_limit_cost",
7474
Help: "limit rate cost of store.",
7575
}, []string{"store", "limit_type"})
76+
77+
scatterCounter = prometheus.NewCounterVec(
78+
prometheus.CounterOpts{
79+
Namespace: "pd",
80+
Subsystem: "schedule",
81+
Name: "scatter_operators_count",
82+
Help: "Counter of region scatter operators.",
83+
}, []string{"type", "event"})
84+
85+
scatterDistributionCounter = prometheus.NewCounterVec(
86+
prometheus.CounterOpts{
87+
Namespace: "pd",
88+
Subsystem: "schedule",
89+
Name: "scatter_distribution",
90+
Help: "Counter of the distribution in scatter.",
91+
}, []string{"store", "is_leader", "engine"})
7692
)
7793

7894
func init() {
@@ -83,4 +99,6 @@ func init() {
8399
prometheus.MustRegister(storeLimitRateGauge)
84100
prometheus.MustRegister(storeLimitCostCounter)
85101
prometheus.MustRegister(operatorWaitCounter)
102+
prometheus.MustRegister(scatterCounter)
103+
prometheus.MustRegister(scatterDistributionCounter)
86104
}

server/schedule/region_scatterer.go

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ const maxRetryLimit = 30
137137
func (r *RegionScatterer) ScatterRegionsByRange(startKey, endKey []byte, group string, retryLimit int) ([]*operator.Operator, map[uint64]error, error) {
138138
regions := r.cluster.ScanRegions(startKey, endKey, -1)
139139
if len(regions) < 1 {
140+
scatterCounter.WithLabelValues("skip", "empty_region").Inc()
140141
return nil, nil, errors.New("empty region")
141142
}
142143
failures := make(map[uint64]error, len(regions))
@@ -155,13 +156,16 @@ func (r *RegionScatterer) ScatterRegionsByRange(startKey, endKey []byte, group s
155156
// ScatterRegionsByID directly scatter regions by ScatterRegions
156157
func (r *RegionScatterer) ScatterRegionsByID(regionsID []uint64, group string, retryLimit int) ([]*operator.Operator, map[uint64]error, error) {
157158
if len(regionsID) < 1 {
159+
scatterCounter.WithLabelValues("skip", "empty_region").Inc()
158160
return nil, nil, errors.New("empty region")
159161
}
160162
failures := make(map[uint64]error, len(regionsID))
161163
var regions []*core.RegionInfo
162164
for _, id := range regionsID {
163165
region := r.cluster.GetRegion(id)
164166
if region == nil {
167+
scatterCounter.WithLabelValues("skip", "no_region").Inc()
168+
log.Warn("failed to find region during scatter", zap.Uint64("region-id", id))
165169
failures[id] = errors.New(fmt.Sprintf("failed to find region %v", id))
166170
continue
167171
}
@@ -187,6 +191,7 @@ func (r *RegionScatterer) ScatterRegionsByID(regionsID []uint64, group string, r
187191
// and the value of the failures indicates the failure error.
188192
func (r *RegionScatterer) ScatterRegions(regions map[uint64]*core.RegionInfo, failures map[uint64]error, group string, retryLimit int) ([]*operator.Operator, error) {
189193
if len(regions) < 1 {
194+
scatterCounter.WithLabelValues("skip", "empty_region").Inc()
190195
return nil, errors.New("empty region")
191196
}
192197
if retryLimit > maxRetryLimit {
@@ -226,14 +231,20 @@ func (r *RegionScatterer) ScatterRegions(regions map[uint64]*core.RegionInfo, fa
226231
func (r *RegionScatterer) Scatter(region *core.RegionInfo, group string) (*operator.Operator, error) {
227232
if !opt.IsRegionReplicated(r.cluster, region) {
228233
r.cluster.AddSuspectRegions(region.GetID())
234+
scatterCounter.WithLabelValues("skip", "not_replicated").Inc()
235+
log.Warn("region not replicated during scatter", zap.Uint64("region-id", region.GetID()))
229236
return nil, errors.Errorf("region %d is not fully replicated", region.GetID())
230237
}
231238

232239
if region.GetLeader() == nil {
240+
scatterCounter.WithLabelValues("skip", "no_leader").Inc()
241+
log.Warn("region no leader during scatter", zap.Uint64("region-id", region.GetID()))
233242
return nil, errors.Errorf("region %d has no leader", region.GetID())
234243
}
235244

236245
if r.cluster.IsRegionHot(region) {
246+
scatterCounter.WithLabelValues("skip", "hot").Inc()
247+
log.Warn("region too hot during scatter", zap.Uint64("region-id", region.GetID()))
237248
return nil, errors.Errorf("region %d is hot", region.GetID())
238249
}
239250

@@ -286,15 +297,19 @@ func (r *RegionScatterer) scatterRegion(region *core.RegionInfo, group string) *
286297

287298
op, err := operator.CreateScatterRegionOperator("scatter-region", r.cluster, region, targetPeers, targetLeader)
288299
if err != nil {
300+
scatterCounter.WithLabelValues("fail", "").Inc()
289301
for _, peer := range region.GetPeers() {
290302
targetPeers[peer.GetStoreId()] = peer
291303
}
292304
r.Put(targetPeers, region.GetLeader().GetStoreId(), group)
293305
log.Debug("fail to create scatter region operator", errs.ZapError(err))
294306
return nil
295307
}
296-
r.Put(targetPeers, targetLeader, group)
297-
op.SetPriorityLevel(core.HighPriority)
308+
if op != nil {
309+
scatterCounter.WithLabelValues("success", "").Inc()
310+
r.Put(targetPeers, targetLeader, group)
311+
op.SetPriorityLevel(core.HighPriority)
312+
}
298313
return op
299314
}
300315

@@ -375,10 +390,22 @@ func (r *RegionScatterer) Put(peers map[uint64]*metapb.Peer, leaderStoreID uint6
375390
store := r.cluster.GetStore(storeID)
376391
if ordinaryFilter.Target(r.cluster.GetOpts(), store) {
377392
r.ordinaryEngine.selectedPeer.Put(storeID, group)
393+
scatterDistributionCounter.WithLabelValues(
394+
fmt.Sprintf("%v", storeID),
395+
fmt.Sprintf("%v", false),
396+
filter.EngineTiKV).Inc()
378397
} else {
379398
engine := store.GetLabelValue(filter.EngineKey)
380399
r.specialEngines[engine].selectedPeer.Put(storeID, group)
400+
scatterDistributionCounter.WithLabelValues(
401+
fmt.Sprintf("%v", storeID),
402+
fmt.Sprintf("%v", false),
403+
engine).Inc()
381404
}
382405
}
383406
r.ordinaryEngine.selectedLeader.Put(leaderStoreID, group)
407+
scatterDistributionCounter.WithLabelValues(
408+
fmt.Sprintf("%v", leaderStoreID),
409+
fmt.Sprintf("%v", true),
410+
filter.EngineTiKV).Inc()
384411
}

0 commit comments

Comments
 (0)