@@ -327,6 +327,11 @@ func (c *RaftCluster) InitCluster(
327327
328328// Start starts a cluster.
329329func (c * RaftCluster ) Start (s Server , bootstrap bool ) (err error ) {
330+ start := time .Now ()
331+ defer func () {
332+ raftClusterStartDuration .Observe (time .Since (start ).Seconds ())
333+ }()
334+
330335 c .Lock ()
331336 defer c .Unlock ()
332337
@@ -335,15 +340,24 @@ func (c *RaftCluster) Start(s Server, bootstrap bool) (err error) {
335340 return nil
336341 }
337342 c .isKeyspaceGroupEnabled = s .IsKeyspaceGroupEnabled ()
343+ log .Info ("[leader-ready] start to init cluster" )
344+ initClusterStart := time .Now ()
338345 err = c .InitCluster (s .GetAllocator (), s .GetPersistOptions (), s .GetHBStreams (), s .GetKeyspaceGroupManager ())
339346 if err != nil {
347+ log .Error ("[leader-ready] failed to init cluster" , errs .ZapError (err ), zap .Duration ("cost" , time .Since (initClusterStart )))
340348 return err
341349 }
350+ initClusterDuration := time .Since (initClusterStart )
351+ log .Info ("[leader-ready] init cluster completed" , zap .Duration ("cost" , initClusterDuration ))
342352 // We should not manage tso service when bootstrap try to start raft cluster.
343353 // It only is controlled by leader election.
344354 // Ref: https://github.com/tikv/pd/issues/8836
345355 if ! bootstrap {
356+ log .Info ("[leader-ready] start to check TSO service" )
357+ checkTSOStart := time .Now ()
346358 c .checkTSOService ()
359+ checkTSODuration := time .Since (checkTSOStart )
360+ log .Info ("[leader-ready] check TSO service completed" , zap .Duration ("cost" , checkTSODuration ))
347361 }
348362 defer func () {
349363 if ! bootstrap && err != nil {
@@ -358,12 +372,16 @@ func (c *RaftCluster) Start(s Server, bootstrap bool) (err error) {
358372 }
359373 failpoint .Return (err )
360374 })
375+ log .Info ("[leader-ready] start to load cluster info" )
376+ loadClusterInfoStart := time .Now ()
361377 cluster , err := c .LoadClusterInfo ()
362378 if err != nil {
379+ log .Error ("[leader-ready] failed to load cluster info" , errs .ZapError (err ), zap .Duration ("cost" , time .Since (loadClusterInfoStart )))
363380 return err
364381 }
382+ loadClusterInfoDuration := time .Since (loadClusterInfoStart )
365383 if cluster == nil {
366- log .Warn ("cluster is not bootstrapped" )
384+ log .Warn ("[leader-ready] cluster is not bootstrapped" , zap . Duration ( "cost" , loadClusterInfoDuration ) )
367385 return nil
368386 }
369387 if c .opt .IsPlacementRulesEnabled () {
@@ -372,11 +390,17 @@ func (c *RaftCluster) Start(s Server, bootstrap bool) (err error) {
372390 return err
373391 }
374392 }
393+ log .Info ("[leader-ready] load cluster info completed" , zap .Duration ("cost" , loadClusterInfoDuration ))
375394
395+ log .Info ("[leader-ready] creating region labeler" )
396+ labelerStart := time .Now ()
376397 c .regionLabeler , err = labeler .NewRegionLabeler (c .ctx , c .storage , regionLabelGCInterval )
398+ labelerDuration := time .Since (labelerStart )
377399 if err != nil {
400+ log .Error ("[leader-ready] region labeler creation failed" , zap .Error (err ), zap .Duration ("cost" , labelerDuration ))
378401 return err
379402 }
403+ log .Info ("[leader-ready] region labeler created" , zap .Duration ("cost" , labelerDuration ))
380404
381405 // create affinity manager with region labeler for key range validation and rebuild
382406 c .affinityManager , err = affinity .NewManager (c .ctx , c .storage , c , c .GetOpts (), c .regionLabeler )
@@ -385,27 +409,53 @@ func (c *RaftCluster) Start(s Server, bootstrap bool) (err error) {
385409 }
386410
387411 if ! c .IsServiceIndependent (constant .SchedulingServiceName ) {
412+ log .Info ("[leader-ready] start to observe slow store status" )
413+ observeSlowStoreStart := time .Now ()
388414 for _ , store := range c .GetStores () {
389415 storeID := store .GetID ()
390416 c .slowStat .ObserveSlowStoreStatus (storeID , store .IsSlow ())
391417 }
418+ observeSlowStoreDuration := time .Since (observeSlowStoreStart )
419+ log .Info ("[leader-ready] observe slow store status completed" , zap .Duration ("cost" , observeSlowStoreDuration ))
392420 }
421+ log .Info ("[leader-ready] start to create replication mode manager" )
422+ replicationModeStart := time .Now ()
393423 c .replicationMode , err = replication .NewReplicationModeManager (s .GetConfig ().ReplicationMode , c .storage , cluster , s )
394424 if err != nil {
425+ log .Error ("[leader-ready] failed to create replication mode manager" , errs .ZapError (err ), zap .Duration ("cost" , time .Since (replicationModeStart )))
395426 return err
396427 }
428+ replicationModeDuration := time .Since (replicationModeStart )
429+ log .Info ("[leader-ready] create replication mode manager completed" , zap .Duration ("cost" , replicationModeDuration ))
430+ log .Info ("[leader-ready] start to load external timestamp" )
431+ loadExternalTSStart := time .Now ()
397432 c .loadExternalTS ()
433+ log .Info ("[leader-ready] load external timestamp completed" , zap .Duration ("cost" , time .Since (loadExternalTSStart )))
434+ log .Info ("[leader-ready] start to load min resolved ts" )
435+ loadMinResolvedTSStart := time .Now ()
398436 c .loadMinResolvedTS ()
437+ log .Info ("[leader-ready] load min resolved ts completed" , zap .Duration ("cost" , time .Since (loadMinResolvedTSStart )))
399438
400439 if c .isKeyspaceGroupEnabled {
401440 // bootstrap keyspace group manager after starting other parts successfully.
402441 // This order avoids a stuck goroutine in keyspaceGroupManager when it fails to create raftcluster.
442+ log .Info ("[leader-ready] start to bootstrap keyspace group manager" )
443+ bootstrapKeyspaceStart := time .Now ()
403444 err = c .keyspaceGroupManager .Bootstrap (c .ctx )
404445 if err != nil {
446+ log .Error ("[leader-ready] failed to bootstrap keyspace group manager" , errs .ZapError (err ), zap .Duration ("cost" , time .Since (bootstrapKeyspaceStart )))
405447 return err
406448 }
449+ bootstrapKeyspaceDuration := time .Since (bootstrapKeyspaceStart )
450+ log .Info ("[leader-ready] bootstrap keyspace group manager completed" , zap .Duration ("cost" , bootstrapKeyspaceDuration ))
407451 }
452+ log .Info ("[leader-ready] start to check scheduling service" )
453+ checkSchedulingStart := time .Now ()
408454 c .checkSchedulingService ()
455+ checkSchedulingDuration := time .Since (checkSchedulingStart )
456+ log .Info ("[leader-ready] check scheduling service completed" , zap .Duration ("cost" , checkSchedulingDuration ))
457+ log .Info ("[leader-ready] start to start background jobs" )
458+ backgroundJobsStart := time .Now ()
409459 c .wg .Add (11 )
410460 go c .runServiceCheckJob ()
411461 go c .runMetricsCollectionJob ()
@@ -418,12 +468,18 @@ func (c *RaftCluster) Start(s Server, bootstrap bool) (err error) {
418468 go c .startGCTuner ()
419469 go c .startProgressGC ()
420470 go c .runStorageSizeCollector (s .GetMeteringWriter (), c .regionLabeler , s .GetKeyspaceManager ())
471+ backgroundJobsDuration := time .Since (backgroundJobsStart )
472+ log .Info ("[leader-ready] start background jobs completed" , zap .Duration ("cost" , backgroundJobsDuration ))
421473
474+ log .Info ("[leader-ready] start to start runners" )
475+ runnersStart := time .Now ()
422476 c .running = true
423477 c .heartbeatRunner .Start (c .ctx )
424478 c .miscRunner .Start (c .ctx )
425479 c .logRunner .Start (c .ctx )
426480 c .syncRegionRunner .Start (c .ctx )
481+ runnersDuration := time .Since (runnersStart )
482+ log .Info ("[leader-ready] start runners completed" , zap .Duration ("cost" , runnersDuration ))
427483 return nil
428484}
429485
0 commit comments