Skip to content

Commit e2c8bf5

Browse files
authored
gc: Add keyspace name to logs about GC (#9875)
close #8978 As keyspace ID is hidden in metrics and hidden to users, printing keyspace ID in logs might not help in diagnosing. This PR adds keyspace name field to the logs of the GC state manager, while still keeping the keyspace ID in the logs. Signed-off-by: MyonKeminta <MyonKeminta@users.noreply.github.com>
1 parent 15e74e4 commit e2c8bf5

File tree

5 files changed

+105
-49
lines changed

5 files changed

+105
-49
lines changed

errors.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ trying to set a GC barrier on ts %d which is already behind the txn safe point %
423423

424424
["PD:gc:ErrGCOnInvalidKeyspace"]
425425
error = '''
426-
trying to manage GC in keyspace %v where keyspace level GC is not enabled
426+
trying to manage GC in keyspace %v (id: %v) where keyspace level GC is not enabled
427427
'''
428428

429429
["PD:gc:ErrGCSafePointExceedsTxnSafePoint"]

pkg/errs/errno.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -540,7 +540,7 @@ var (
540540

541541
// GC errors
542542
var (
543-
ErrGCOnInvalidKeyspace = errors.Normalize("trying to manage GC in keyspace %v where keyspace level GC is not enabled", errors.RFCCodeText("PD:gc:ErrGCOnInvalidKeyspace"))
543+
ErrGCOnInvalidKeyspace = errors.Normalize("trying to manage GC in keyspace %v (id: %v) where keyspace level GC is not enabled", errors.RFCCodeText("PD:gc:ErrGCOnInvalidKeyspace"))
544544
ErrDecreasingGCSafePoint = errors.Normalize("trying to update GC safe point to a smaller value, current value: %v, given: %v", errors.RFCCodeText("PD:gc:ErrDecreasingGCSafePoint"))
545545
ErrGCSafePointExceedsTxnSafePoint = errors.Normalize("trying to update GC safe point to a too large value that exceeds the txn safe point, current value: %v, given: %v, current txn safe point: %v", errors.RFCCodeText("PD:gc:ErrGCSafePointExceedsTxnSafePoint"))
546546
ErrDecreasingTxnSafePoint = errors.Normalize("trying to update txn safe point to a smaller value, current value: %v, given: %v", errors.RFCCodeText("PD:gc:ErrDecreasingTxnSafePoint"))

pkg/gc/gc_state_manager.go

Lines changed: 78 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -131,34 +131,56 @@ func NewGCStateManager(store endpoint.GCStateProvider, cfg config.PDServerConfig
131131
}
132132
}
133133

134+
type keyspaceNameKeyType struct{}
135+
136+
var keyspaceNameKey = keyspaceNameKeyType{}
137+
138+
func addKeyspaceNameToCtx(ctx context.Context, keyspaceName string) context.Context {
139+
return context.WithValue(ctx, keyspaceNameKey, keyspaceName)
140+
}
141+
142+
func getKeyspaceNameFromCtx(ctx context.Context) string {
143+
if ctx == nil {
144+
return "<unknown>"
145+
}
146+
if value, ok := ctx.Value(keyspaceNameKey).(string); ok {
147+
return value
148+
}
149+
return "<unknown>"
150+
}
151+
134152
// redirectKeyspace checks the given keyspaceID, and returns the actual keyspaceID to operate on.
135-
func (m *GCStateManager) redirectKeyspace(keyspaceID uint32, isUserAPI bool) (uint32, error) {
153+
//
154+
// This function also returns the target keyspace name for diagnostic purpose. But note that it returns a user-friendly
155+
// string only for diagnostic purposes (it returns "<null_keyspace>" for NullKeyspaceID). DO NOT use it as the key for
156+
// identifying a keyspace.
157+
func (m *GCStateManager) redirectKeyspace(keyspaceID uint32, isUserAPI bool) (uint32, string, error) {
136158
// Regard it as NullKeyspaceID if the given one is invalid (exceeds the valid range of keyspace id), no matter
137159
// whether it exactly matches the NullKeyspaceID.
138160
if keyspaceID & ^constant.ValidKeyspaceIDMask != 0 {
139-
return constant.NullKeyspaceID, nil
161+
return constant.NullKeyspaceID, "<null_keyspace>", nil
140162
}
141163

142164
keyspaceMeta, err := m.keyspaceManager.LoadKeyspaceByID(keyspaceID)
143165
if err != nil {
144-
return 0, err
166+
return 0, "", err
145167
}
146168
if keyspaceMeta.Config[keyspace.GCManagementType] != keyspace.KeyspaceLevelGC {
147169
if isUserAPI {
148170
// The user API is expected to always work. Operate on the state of unified GC instead.
149-
return constant.NullKeyspaceID, nil
171+
return constant.NullKeyspaceID, "<null_keyspace>", nil
150172
}
151173
// Internal API should never be called on keyspaces without keyspace level GC. They won't perform any active
152174
// GC operation and will be managed by the unified GC.
153-
return 0, errs.ErrGCOnInvalidKeyspace.GenWithStackByArgs(keyspaceID)
175+
return 0, "", errs.ErrGCOnInvalidKeyspace.GenWithStackByArgs(keyspaceMeta.GetName(), keyspaceID)
154176
}
155177

156-
return keyspaceID, nil
178+
return keyspaceID, keyspaceMeta.GetName(), nil
157179
}
158180

159181
// CompatibleLoadGCSafePoint loads current GC safe point from storage for the legacy GC API `GetGCSafePoint`.
160182
func (m *GCStateManager) CompatibleLoadGCSafePoint(keyspaceID uint32) (uint64, error) {
161-
keyspaceID, err := m.redirectKeyspace(keyspaceID, false)
183+
keyspaceID, _, err := m.redirectKeyspace(keyspaceID, false)
162184
if err != nil {
163185
return 0, err
164186
}
@@ -174,29 +196,36 @@ func (m *GCStateManager) CompatibleLoadGCSafePoint(keyspaceID uint32) (uint64, e
174196
// have the responsibility to manage GC. It can only be called on NullKeyspace or keyspaces with keyspace level GC
175197
// enabled.
176198
func (m *GCStateManager) AdvanceGCSafePoint(keyspaceID uint32, target uint64) (oldGCSafePoint uint64, newGCSafePoint uint64, err error) {
177-
keyspaceID, err = m.redirectKeyspace(keyspaceID, false)
199+
keyspaceID, keyspaceNameForDiag, err := m.redirectKeyspace(keyspaceID, false)
178200
if err != nil {
179201
return
180202
}
203+
ctx := addKeyspaceNameToCtx(context.Background(), keyspaceNameForDiag)
181204

182205
m.mu.Lock()
183206
defer m.mu.Unlock()
184207

185-
return m.advanceGCSafePointImpl(keyspaceID, target, false)
208+
return m.advanceGCSafePointImpl(ctx, keyspaceID, target, false)
186209
}
187210

188211
// CompatibleUpdateGCSafePoint tries to advance the GC safe point to the given target. If the target is less than the
189212
// current value, it returns the current value without updating it.
190213
// This is provided for compatibility purpose, making the existing uses of the deprecated API `UpdateGCSafePoint`
191214
// still work.
192215
func (m *GCStateManager) CompatibleUpdateGCSafePoint(keyspaceID uint32, target uint64) (oldGCSafePoint uint64, newGCSafePoint uint64, err error) {
216+
keyspaceID, keyspaceNameForDiag, err := m.redirectKeyspace(keyspaceID, false)
217+
if err != nil {
218+
return
219+
}
220+
ctx := addKeyspaceNameToCtx(context.Background(), keyspaceNameForDiag)
221+
193222
m.mu.Lock()
194223
defer m.mu.Unlock()
195224

196-
return m.advanceGCSafePointImpl(keyspaceID, target, true)
225+
return m.advanceGCSafePointImpl(ctx, keyspaceID, target, true)
197226
}
198227

199-
func (m *GCStateManager) advanceGCSafePointImpl(keyspaceID uint32, target uint64, compatible bool) (oldGCSafePoint uint64, newGCSafePoint uint64, err error) {
228+
func (m *GCStateManager) advanceGCSafePointImpl(ctx context.Context, keyspaceID uint32, target uint64, compatible bool) (oldGCSafePoint uint64, newGCSafePoint uint64, err error) {
200229
newGCSafePoint = target
201230

202231
err = m.gcMetaStorage.RunInGCStateTransaction(func(wb *endpoint.GCStateWriteBatch) error {
@@ -210,6 +239,7 @@ func (m *GCStateManager) advanceGCSafePointImpl(keyspaceID uint32, target uint64
210239
// When in compatible mode, trying to update the safe point to a smaller value fails silently, returning
211240
// the actual value. There exist some use cases that fetches the current value by passing zero.
212241
log.Warn("deprecated API `UpdateGCSafePoint` is called with invalid argument",
242+
zap.Uint32("keyspace-id", keyspaceID), zap.String("keyspace-name", getKeyspaceNameFromCtx(ctx)),
213243
zap.Uint64("current-gc-safe-point", oldGCSafePoint), zap.Uint64("attempted-gc-safe-point", target))
214244
newGCSafePoint = oldGCSafePoint
215245
return nil
@@ -229,19 +259,19 @@ func (m *GCStateManager) advanceGCSafePointImpl(keyspaceID uint32, target uint64
229259
})
230260
if err != nil {
231261
log.Error("failed to advance GC safe point",
232-
zap.Uint32("keyspace-id", keyspaceID),
262+
zap.Uint32("keyspace-id", keyspaceID), zap.String("keyspace-name", getKeyspaceNameFromCtx(ctx)),
233263
zap.Uint64("target", target), zap.Bool("compatible-mode", compatible), zap.Error(err))
234264
return 0, 0, err
235265
}
236266

237267
if newGCSafePoint != oldGCSafePoint {
238268
log.Info("advanced GC safe point",
239-
zap.Uint32("keyspace-id", keyspaceID),
269+
zap.Uint32("keyspace-id", keyspaceID), zap.String("keyspace-name", getKeyspaceNameFromCtx(ctx)),
240270
zap.Uint64("old-gc-safe-point", oldGCSafePoint), zap.Uint64("target", target),
241271
zap.Uint64("new-gc-safe-point", newGCSafePoint), zap.Bool("compatible-mode", compatible))
242272
} else {
243273
log.Info("GC safe point not changed after AdvanceGCSafePoint call",
244-
zap.Uint32("keyspace-id", keyspaceID),
274+
zap.Uint32("keyspace-id", keyspaceID), zap.String("keyspace-name", getKeyspaceNameFromCtx(ctx)),
245275
zap.Uint64("gc-safe-point", newGCSafePoint), zap.Uint64("target", target), zap.Bool("compatible-mode", compatible))
246276
}
247277

@@ -267,19 +297,21 @@ func (m *GCStateManager) advanceGCSafePointImpl(keyspaceID uint32, target uint64
267297
// have the responsibility to manage GC. It can only be called on NullKeyspace or keyspaces with keyspace level GC
268298
// enabled.
269299
func (m *GCStateManager) AdvanceTxnSafePoint(keyspaceID uint32, target uint64, now time.Time) (AdvanceTxnSafePointResult, error) {
270-
keyspaceID, err := m.redirectKeyspace(keyspaceID, false)
300+
keyspaceID, keyspaceNameForDiag, err := m.redirectKeyspace(keyspaceID, false)
271301
if err != nil {
272302
return AdvanceTxnSafePointResult{}, err
273303
}
304+
ctx := addKeyspaceNameToCtx(context.Background(), keyspaceNameForDiag)
305+
274306
m.mu.Lock()
275307
defer m.mu.Unlock()
276308

277-
return m.advanceTxnSafePointImpl(keyspaceID, target, now)
309+
return m.advanceTxnSafePointImpl(ctx, keyspaceID, target, now)
278310
}
279311

280312
// advanceTxnSafePointImpl is the internal implementation of AdvanceTxnSafePoint, assuming keyspaceID has been checked
281313
// and the mutex has been acquired.
282-
func (m *GCStateManager) advanceTxnSafePointImpl(keyspaceID uint32, target uint64, now time.Time) (AdvanceTxnSafePointResult, error) {
314+
func (m *GCStateManager) advanceTxnSafePointImpl(ctx context.Context, keyspaceID uint32, target uint64, now time.Time) (AdvanceTxnSafePointResult, error) {
283315
// Marks whether it's needed to provide the compatibility for old versions.
284316
//
285317
// In old versions, every time TiDB performs GC, it updates the service safe point of "gc_worker" new txn safe
@@ -419,33 +451,34 @@ func (m *GCStateManager) advanceTxnSafePointImpl(keyspaceID uint32, target uint6
419451
BlockerDescription: blockerDesc,
420452
simulatedServiceID: simulatedServiceID,
421453
}
422-
m.logAdvancingTxnSafePoint(keyspaceID, result, minBlocker, downgradeCompatibleMode)
454+
m.logAdvancingTxnSafePoint(ctx, keyspaceID, result, minBlocker, downgradeCompatibleMode)
423455
return result, nil
424456
}
425457

426-
func (*GCStateManager) logAdvancingTxnSafePoint(keyspaceID uint32, result AdvanceTxnSafePointResult, minBlocker uint64, downgradeCompatibleMode bool) {
458+
func (*GCStateManager) logAdvancingTxnSafePoint(ctx context.Context, keyspaceID uint32, result AdvanceTxnSafePointResult, minBlocker uint64, downgradeCompatibleMode bool) {
459+
keyspaceName := getKeyspaceNameFromCtx(ctx)
427460
if result.NewTxnSafePoint != result.Target {
428461
if result.NewTxnSafePoint == minBlocker {
429462
log.Info("txn safe point advancement is being blocked",
430-
zap.Uint32("keyspace-id", keyspaceID),
463+
zap.Uint32("keyspace-id", keyspaceID), zap.String("keyspace-name", keyspaceName),
431464
zap.Uint64("old-txn-safe-point", result.OldTxnSafePoint), zap.Uint64("target", result.Target),
432465
zap.Uint64("new-txn-safe-point", result.NewTxnSafePoint), zap.String("blocker", result.BlockerDescription),
433466
zap.Bool("downgrade-compatible-mode", downgradeCompatibleMode))
434467
} else {
435468
log.Info("txn safe point advancement unable to be blocked by the minimum blocker",
436-
zap.Uint32("keyspace-id", keyspaceID),
469+
zap.Uint32("keyspace-id", keyspaceID), zap.String("keyspace-name", keyspaceName),
437470
zap.Uint64("old-txn-safe-point", result.OldTxnSafePoint), zap.Uint64("target", result.Target),
438471
zap.Uint64("new-txn-safe-point", result.NewTxnSafePoint), zap.String("blocker", result.BlockerDescription),
439472
zap.Uint64("min-blocker-ts", minBlocker), zap.Bool("downgrade-compatible-mode", downgradeCompatibleMode))
440473
}
441474
} else if result.NewTxnSafePoint > result.OldTxnSafePoint {
442475
log.Info("txn safe point advanced",
443-
zap.Uint32("keyspace-id", keyspaceID),
476+
zap.Uint32("keyspace-id", keyspaceID), zap.String("keyspace-name", keyspaceName),
444477
zap.Uint64("old-txn-safe-point", result.OldTxnSafePoint), zap.Uint64("new-txn-safe-point", result.NewTxnSafePoint),
445478
zap.Bool("downgrade-compatible-mode", downgradeCompatibleMode))
446479
} else {
447480
log.Info("txn safe point is remaining unchanged",
448-
zap.Uint32("keyspace-id", keyspaceID),
481+
zap.Uint32("keyspace-id", keyspaceID), zap.String("keyspace-name", keyspaceName),
449482
zap.Uint64("old-txn-safe-point", result.OldTxnSafePoint), zap.Uint64("new-txn-safe-point", result.NewTxnSafePoint),
450483
zap.Uint64("target", result.Target),
451484
zap.Bool("downgrade-compatible-mode", downgradeCompatibleMode))
@@ -481,18 +514,19 @@ func (m *GCStateManager) SetGCBarrier(keyspaceID uint32, barrierID string, barri
481514
return nil, errs.ErrInvalidArgument.GenWithStackByArgs("ttl", ttl)
482515
}
483516

484-
keyspaceID, err := m.redirectKeyspace(keyspaceID, true)
517+
keyspaceID, keyspaceNameForDiag, err := m.redirectKeyspace(keyspaceID, true)
485518
if err != nil {
486519
return nil, err
487520
}
521+
ctx := addKeyspaceNameToCtx(context.Background(), keyspaceNameForDiag)
488522

489523
m.mu.Lock()
490524
defer m.mu.Unlock()
491525

492-
return m.setGCBarrierImpl(keyspaceID, barrierID, barrierTS, ttl, now)
526+
return m.setGCBarrierImpl(ctx, keyspaceID, barrierID, barrierTS, ttl, now)
493527
}
494528

495-
func (m *GCStateManager) setGCBarrierImpl(keyspaceID uint32, barrierID string, barrierTS uint64, ttl time.Duration, now time.Time) (*endpoint.GCBarrier, error) {
529+
func (m *GCStateManager) setGCBarrierImpl(ctx context.Context, keyspaceID uint32, barrierID string, barrierTS uint64, ttl time.Duration, now time.Time) (*endpoint.GCBarrier, error) {
496530
// The barrier ID (or service ID of the service safe points) is reserved for keeping backward compatibility.
497531
if barrierID == keypath.GCWorkerServiceSafePointID {
498532
return nil, errs.ErrReservedGCBarrierID.GenWithStackByArgs(barrierID)
@@ -522,13 +556,13 @@ func (m *GCStateManager) setGCBarrierImpl(keyspaceID uint32, barrierID string, b
522556
})
523557
if err != nil {
524558
log.Error("failed to set GC barrier",
525-
zap.Uint32("keyspace-id", keyspaceID),
559+
zap.Uint32("keyspace-id", keyspaceID), zap.String("keyspace-name", getKeyspaceNameFromCtx(ctx)),
526560
zap.String("barrier-id", barrierID), zap.Uint64("barrier-ts", barrierTS), zap.Duration("ttl", ttl), zap.Error(err))
527561
return nil, err
528562
}
529563

530564
log.Info("GC barrier set",
531-
zap.Uint32("keyspace-id", keyspaceID),
565+
zap.Uint32("keyspace-id", keyspaceID), zap.String("keyspace-name", getKeyspaceNameFromCtx(ctx)),
532566
zap.String("barrier-id", barrierID), zap.Uint64("barrier-ts", barrierTS), zap.Duration("ttl", ttl),
533567
zap.Stringer("new-gc-barrier", newBarrier))
534568

@@ -541,18 +575,19 @@ func (m *GCStateManager) setGCBarrierImpl(keyspaceID uint32, barrierID string, b
541575
// When this method is called on a keyspace without keyspace-level GC enabled, it will be equivalent to calling it on
542576
// the NullKeyspace.
543577
func (m *GCStateManager) DeleteGCBarrier(keyspaceID uint32, barrierID string) (*endpoint.GCBarrier, error) {
544-
keyspaceID, err := m.redirectKeyspace(keyspaceID, true)
578+
keyspaceID, keyspaceNameForDiag, err := m.redirectKeyspace(keyspaceID, true)
545579
if err != nil {
546580
return nil, err
547581
}
582+
ctx := addKeyspaceNameToCtx(context.Background(), keyspaceNameForDiag)
548583

549584
m.mu.Lock()
550585
defer m.mu.Unlock()
551586

552-
return m.deleteGCBarrierImpl(keyspaceID, barrierID)
587+
return m.deleteGCBarrierImpl(ctx, keyspaceID, barrierID)
553588
}
554589

555-
func (m *GCStateManager) deleteGCBarrierImpl(keyspaceID uint32, barrierID string) (*endpoint.GCBarrier, error) {
590+
func (m *GCStateManager) deleteGCBarrierImpl(ctx context.Context, keyspaceID uint32, barrierID string) (*endpoint.GCBarrier, error) {
556591
// The barrier ID (or service ID of the service safe points) is reserved for keeping backward compatibility.
557592
if barrierID == keypath.GCWorkerServiceSafePointID {
558593
return nil, errs.ErrReservedGCBarrierID.GenWithStackByArgs(barrierID)
@@ -574,18 +609,18 @@ func (m *GCStateManager) deleteGCBarrierImpl(keyspaceID uint32, barrierID string
574609

575610
if err != nil {
576611
log.Error("failed to delete GC barrier",
577-
zap.Uint32("keyspace-id", keyspaceID),
612+
zap.Uint32("keyspace-id", keyspaceID), zap.String("keyspace-name", getKeyspaceNameFromCtx(ctx)),
578613
zap.String("barrier-id", barrierID), zap.Error(err))
579614
return nil, err
580615
}
581616

582617
if deletedBarrier == nil {
583618
log.Info("deleting a not-existing GC barrier",
584-
zap.Uint32("keyspace-id", keyspaceID),
619+
zap.Uint32("keyspace-id", keyspaceID), zap.String("keyspace-name", getKeyspaceNameFromCtx(ctx)),
585620
zap.String("barrier-id", barrierID))
586621
} else {
587622
log.Info("GC barrier deleted",
588-
zap.Uint32("keyspace-id", keyspaceID),
623+
zap.Uint32("keyspace-id", keyspaceID), zap.String("keyspace-name", getKeyspaceNameFromCtx(ctx)),
589624
zap.String("barrier-id", barrierID), zap.Stringer("deleted-gc-barrier", deletedBarrier))
590625
}
591626

@@ -637,7 +672,7 @@ func (m *GCStateManager) getGCStateInTransaction(keyspaceID uint32, _ *endpoint.
637672
// When this method is called on a keyspace without keyspace-level GC enabled, it will be equivalent to calling it on
638673
// the NullKeyspace.
639674
func (m *GCStateManager) GetGCState(keyspaceID uint32) (GCState, error) {
640-
keyspaceID, err := m.redirectKeyspace(keyspaceID, true)
675+
keyspaceID, _, err := m.redirectKeyspace(keyspaceID, true)
641676
if err != nil {
642677
return GCState{}, err
643678
}
@@ -795,7 +830,11 @@ func (m *GCStateManager) LoadAllGlobalGCBarriers() ([]*endpoint.GlobalGCBarrier,
795830
// simulate the case that the service safe point of "gc_worker" is the minimal one, and return a service safe point
796831
// with the service ID equals to "gc_worker".
797832
func (m *GCStateManager) CompatibleUpdateServiceGCSafePoint(keyspaceID uint32, serviceID string, newServiceSafePoint uint64, ttl int64, now time.Time) (minServiceSafePoint *endpoint.ServiceSafePoint, updated bool, err error) {
798-
keyspaceID, err = m.redirectKeyspace(keyspaceID, true)
833+
keyspaceID, keyspaceNameForDiag, err := m.redirectKeyspace(keyspaceID, true)
834+
if err != nil {
835+
return nil, false, err
836+
}
837+
ctx := addKeyspaceNameToCtx(context.Background(), keyspaceNameForDiag)
799838

800839
m.mu.Lock()
801840
defer m.mu.Unlock()
@@ -806,7 +845,7 @@ func (m *GCStateManager) CompatibleUpdateServiceGCSafePoint(keyspaceID uint32, s
806845
return nil, false, errors.New("TTL of gc_worker's service safe point must be infinity")
807846
}
808847

809-
res, err := m.advanceTxnSafePointImpl(keyspaceID, newServiceSafePoint, now)
848+
res, err := m.advanceTxnSafePointImpl(ctx, keyspaceID, newServiceSafePoint, now)
810849
if err != nil {
811850
return nil, false, err
812851
}
@@ -853,9 +892,9 @@ func (m *GCStateManager) CompatibleUpdateServiceGCSafePoint(keyspaceID uint32, s
853892
updated = ttl > 0 && txnSafePoint <= newServiceSafePoint
854893
default:
855894
if ttl > 0 {
856-
_, err = m.setGCBarrierImpl(keyspaceID, serviceID, newServiceSafePoint, typeutil.SaturatingStdDurationFromSeconds(ttl), now)
895+
_, err = m.setGCBarrierImpl(ctx, keyspaceID, serviceID, newServiceSafePoint, typeutil.SaturatingStdDurationFromSeconds(ttl), now)
857896
} else {
858-
_, err = m.deleteGCBarrierImpl(keyspaceID, serviceID)
897+
_, err = m.deleteGCBarrierImpl(ctx, keyspaceID, serviceID)
859898
}
860899

861900
if err != nil && !errors.Is(err, errs.ErrGCBarrierTSBehindTxnSafePoint) {

0 commit comments

Comments
 (0)