Skip to content

Commit 60b1f87

Browse files
committed
Add some metrics to check the shceduling allowance status
Signed-off-by: JmPotato <ghzpotato@gmail.com>
1 parent 9615916 commit 60b1f87

File tree

3 files changed

+125
-0
lines changed

3 files changed

+125
-0
lines changed

metrics/grafana/pd.json

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2340,6 +2340,113 @@
23402340
"alignLevel": null
23412341
}
23422342
},
2343+
{
2344+
"aliasColors": {},
2345+
"bars": true,
2346+
"dashLength": 10,
2347+
"dashes": false,
2348+
"datasource": "${DS_TEST-CLUSTER}",
2349+
"description": "The allowance status of the scheduling.",
2350+
"fieldConfig": {
2351+
"defaults": {},
2352+
"overrides": []
2353+
},
2354+
"fill": 0,
2355+
"fillGradient": 0,
2356+
"gridPos": {
2357+
"h": 8,
2358+
"w": 12,
2359+
"x": 12,
2360+
"y": 41
2361+
},
2362+
"hiddenSeries": false,
2363+
"id": 1464,
2364+
"legend": {
2365+
"alignAsTable": true,
2366+
"avg": false,
2367+
"current": true,
2368+
"hideEmpty": true,
2369+
"hideZero": true,
2370+
"max": false,
2371+
"min": false,
2372+
"rightSide": true,
2373+
"show": true,
2374+
"total": false,
2375+
"values": true
2376+
},
2377+
"lines": false,
2378+
"linewidth": 1,
2379+
"links": [],
2380+
"nullPointMode": "null",
2381+
"options": {
2382+
"alertThreshold": true
2383+
},
2384+
"paceLength": 10,
2385+
"percentage": false,
2386+
"pluginVersion": "7.5.10",
2387+
"pointradius": 1,
2388+
"points": false,
2389+
"renderer": "flot",
2390+
"seriesOverrides": [],
2391+
"spaceLength": 10,
2392+
"stack": true,
2393+
"steppedLine": false,
2394+
"targets": [
2395+
{
2396+
"exemplar": true,
2397+
"expr": "pd_scheduling_allowance_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=\"$instance\"}",
2398+
"format": "time_series",
2399+
"interval": "",
2400+
"intervalFactor": 2,
2401+
"legendFormat": "{{kind}}",
2402+
"metric": "pd_scheduling_allowance_status",
2403+
"refId": "A",
2404+
"step": 2
2405+
}
2406+
],
2407+
"thresholds": [],
2408+
"timeFrom": null,
2409+
"timeRegions": [],
2410+
"timeShift": null,
2411+
"title": "Scheduling Allowance Status",
2412+
"tooltip": {
2413+
"shared": true,
2414+
"sort": 1,
2415+
"value_type": "individual"
2416+
},
2417+
"type": "graph",
2418+
"xaxis": {
2419+
"buckets": null,
2420+
"mode": "time",
2421+
"name": null,
2422+
"show": true,
2423+
"values": []
2424+
},
2425+
"yaxes": [
2426+
{
2427+
"$$hashKey": "object:533",
2428+
"format": "short",
2429+
"label": null,
2430+
"logBase": 1,
2431+
"max": null,
2432+
"min": "0",
2433+
"show": true
2434+
},
2435+
{
2436+
"$$hashKey": "object:534",
2437+
"format": "short",
2438+
"label": null,
2439+
"logBase": 1,
2440+
"max": null,
2441+
"min": null,
2442+
"show": true
2443+
}
2444+
],
2445+
"yaxis": {
2446+
"align": false,
2447+
"alignLevel": null
2448+
}
2449+
},
23432450
{
23442451
"cacheTimeout": null,
23452452
"colorBackground": false,

server/cluster/cluster.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2734,15 +2734,24 @@ func (c *RaftCluster) GetPausedSchedulerDelayUntil(name string) (int64, error) {
27342734
return c.coordinator.getPausedSchedulerDelayUntil(name)
27352735
}
27362736

2737+
var (
2738+
onlineUnsafeRecoveryStatus = schedulingAllowanceStatusGauge.WithLabelValues("online-unsafe-recovery")
2739+
haltSchedulingStatus = schedulingAllowanceStatusGauge.WithLabelValues("halt-scheduling")
2740+
)
2741+
27372742
// checkSchedulingAllowance checks if the cluster allows scheduling.
27382743
func (c *RaftCluster) checkSchedulingAllowance() (bool, error) {
27392744
// If the cluster is in the process of online unsafe recovery, it should not allow scheduling.
27402745
if c.GetUnsafeRecoveryController().IsRunning() {
2746+
onlineUnsafeRecoveryStatus.Set(1)
27412747
return false, errs.ErrUnsafeRecoveryIsRunning.FastGenByArgs()
27422748
}
2749+
onlineUnsafeRecoveryStatus.Set(0)
27432750
// If the halt-scheduling is set, it should not allow scheduling.
27442751
if c.opt.IsSchedulingHalted() {
2752+
haltSchedulingStatus.Set(1)
27452753
return false, errs.ErrSchedulingIsHalted.FastGenByArgs()
27462754
}
2755+
haltSchedulingStatus.Set(0)
27472756
return true, nil
27482757
}

server/cluster/metrics.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,14 @@ var (
4141
Help: "Counter of the bucket event",
4242
}, []string{"event"})
4343

44+
schedulingAllowanceStatusGauge = prometheus.NewGaugeVec(
45+
prometheus.GaugeOpts{
46+
Namespace: "pd",
47+
Subsystem: "scheduling",
48+
Name: "allowance_status",
49+
Help: "Status of the scheduling allowance.",
50+
}, []string{"kind"})
51+
4452
schedulerStatusGauge = prometheus.NewGaugeVec(
4553
prometheus.GaugeOpts{
4654
Namespace: "pd",
@@ -140,6 +148,7 @@ var (
140148
func init() {
141149
prometheus.MustRegister(regionEventCounter)
142150
prometheus.MustRegister(healthStatusGauge)
151+
prometheus.MustRegister(schedulingAllowanceStatusGauge)
143152
prometheus.MustRegister(schedulerStatusGauge)
144153
prometheus.MustRegister(hotSpotStatusGauge)
145154
prometheus.MustRegister(patrolCheckRegionsGauge)

0 commit comments

Comments
 (0)