@@ -320,6 +320,11 @@ func (c *RaftCluster) InitCluster(
320320
321321// Start starts a cluster.
322322func (c * RaftCluster ) Start (s Server , bootstrap bool ) (err error ) {
323+ start := time .Now ()
324+ defer func () {
325+ raftClusterStartDuration .Observe (time .Since (start ).Seconds ())
326+ }()
327+
323328 c .Lock ()
324329 defer c .Unlock ()
325330
@@ -328,15 +333,24 @@ func (c *RaftCluster) Start(s Server, bootstrap bool) (err error) {
328333 return nil
329334 }
330335 c .isAPIServiceMode = s .IsAPIServiceMode ()
336+ log .Info ("[leader-ready] start to init cluster" )
337+ initClusterStart := time .Now ()
331338 err = c .InitCluster (s .GetAllocator (), s .GetPersistOptions (), s .GetHBStreams (), s .GetKeyspaceGroupManager ())
332339 if err != nil {
340+ log .Error ("[leader-ready] failed to init cluster" , errs .ZapError (err ), zap .Duration ("cost" , time .Since (initClusterStart )))
333341 return err
334342 }
343+ initClusterDuration := time .Since (initClusterStart )
344+ log .Info ("[leader-ready] init cluster completed" , zap .Duration ("cost" , initClusterDuration ))
335345 // We should not manage tso service when bootstrap try to start raft cluster.
336346 // It only is controlled by leader election.
337347 // Ref: https://github.com/tikv/pd/issues/8836
338348 if ! bootstrap {
349+ log .Info ("[leader-ready] start to check TSO service" )
350+ checkTSOStart := time .Now ()
339351 c .checkTSOService ()
352+ checkTSODuration := time .Since (checkTSOStart )
353+ log .Info ("[leader-ready] check TSO service completed" , zap .Duration ("cost" , checkTSODuration ))
340354 }
341355 defer func () {
342356 if ! bootstrap && err != nil {
@@ -354,45 +368,83 @@ func (c *RaftCluster) Start(s Server, bootstrap bool) (err error) {
354368 }
355369 failpoint .Return (err )
356370 })
371+ log .Info ("[leader-ready] start to load cluster info" )
372+ loadClusterInfoStart := time .Now ()
357373 cluster , err := c .LoadClusterInfo ()
358374 if err != nil {
375+ log .Error ("[leader-ready] failed to load cluster info" , errs .ZapError (err ), zap .Duration ("cost" , time .Since (loadClusterInfoStart )))
359376 return err
360377 }
378+ loadClusterInfoDuration := time .Since (loadClusterInfoStart )
361379 if cluster == nil {
362- log .Warn ("cluster is not bootstrapped" )
380+ log .Warn ("[leader-ready] cluster is not bootstrapped" , zap . Duration ( "cost" , loadClusterInfoDuration ) )
363381 return nil
364382 }
383+ log .Info ("[leader-ready] load cluster info completed" , zap .Duration ("cost" , loadClusterInfoDuration ))
365384
385+ log .Info ("[leader-ready] creating region labeler" )
386+ labelerStart := time .Now ()
366387 c .regionLabeler , err = labeler .NewRegionLabeler (c .ctx , c .storage , regionLabelGCInterval )
388+ labelerDuration := time .Since (labelerStart )
367389 if err != nil {
390+ log .Error ("[leader-ready] region labeler creation failed" , zap .Error (err ), zap .Duration ("cost" , labelerDuration ))
368391 return err
369392 }
393+ log .Info ("[leader-ready] region labeler created" , zap .Duration ("cost" , labelerDuration ))
370394
371395 if ! c .IsServiceIndependent (constant .SchedulingServiceName ) {
396+ log .Info ("[leader-ready] start to observe slow store status" )
397+ observeSlowStoreStart := time .Now ()
372398 for _ , store := range c .GetStores () {
373399 storeID := store .GetID ()
374400 c .slowStat .ObserveSlowStoreStatus (storeID , store .IsSlow ())
375401 }
402+ observeSlowStoreDuration := time .Since (observeSlowStoreStart )
403+ log .Info ("[leader-ready] observe slow store status completed" , zap .Duration ("cost" , observeSlowStoreDuration ))
376404 }
405+ log .Info ("[leader-ready] start to create replication mode manager" )
406+ replicationModeStart := time .Now ()
377407 c .replicationMode , err = replication .NewReplicationModeManager (s .GetConfig ().ReplicationMode , c .storage , cluster , s )
378408 if err != nil {
409+ log .Error ("[leader-ready] failed to create replication mode manager" , errs .ZapError (err ), zap .Duration ("cost" , time .Since (replicationModeStart )))
379410 return err
380411 }
412+ replicationModeDuration := time .Since (replicationModeStart )
413+ log .Info ("[leader-ready] create replication mode manager completed" , zap .Duration ("cost" , replicationModeDuration ))
414+ log .Info ("[leader-ready] start to create store limiter" )
415+ limiterStart := time .Now ()
381416 c .limiter = NewStoreLimiter (s .GetPersistOptions ())
417+ limiterDuration := time .Since (limiterStart )
418+ log .Info ("[leader-ready] create store limiter completed" , zap .Duration ("cost" , limiterDuration ))
419+ loadExternalTSStart := time .Now ()
382420 c .externalTS , err = c .storage .LoadExternalTS ()
383421 if err != nil {
384- log .Error ("load external timestamp meets error" , zap .Error (err ))
422+ log .Error ("[leader-ready] load external timestamp meets error" , zap .Error (err ), zap .Duration ("cost" , time .Since (loadExternalTSStart )))
423+ } else {
424+ loadExternalTSDuration := time .Since (loadExternalTSStart )
425+ log .Info ("[leader-ready] load external timestamp completed" , zap .Duration ("cost" , loadExternalTSDuration ))
385426 }
386427
387428 if c .isAPIServiceMode {
388429 // bootstrap keyspace group manager after starting other parts successfully.
389430 // This order avoids a stuck goroutine in keyspaceGroupManager when it fails to create raftcluster.
431+ log .Info ("[leader-ready] start to bootstrap keyspace group manager" )
432+ bootstrapKeyspaceStart := time .Now ()
390433 err = c .keyspaceGroupManager .Bootstrap (c .ctx )
391434 if err != nil {
435+ log .Error ("[leader-ready] failed to bootstrap keyspace group manager" , errs .ZapError (err ), zap .Duration ("cost" , time .Since (bootstrapKeyspaceStart )))
392436 return err
393437 }
438+ bootstrapKeyspaceDuration := time .Since (bootstrapKeyspaceStart )
439+ log .Info ("[leader-ready] bootstrap keyspace group manager completed" , zap .Duration ("cost" , bootstrapKeyspaceDuration ))
394440 }
441+ log .Info ("[leader-ready] start to check scheduling service" )
442+ checkSchedulingStart := time .Now ()
395443 c .checkSchedulingService ()
444+ checkSchedulingDuration := time .Since (checkSchedulingStart )
445+ log .Info ("[leader-ready] check scheduling service completed" , zap .Duration ("cost" , checkSchedulingDuration ))
446+ log .Info ("[leader-ready] start to start background jobs" )
447+ backgroundJobsStart := time .Now ()
396448 c .wg .Add (9 )
397449 go c .runServiceCheckJob ()
398450 go c .runMetricsCollectionJob ()
@@ -403,11 +455,17 @@ func (c *RaftCluster) Start(s Server, bootstrap bool) (err error) {
403455 go c .runStoreConfigSync ()
404456 go c .runUpdateStoreStats ()
405457 go c .startGCTuner ()
458+ backgroundJobsDuration := time .Since (backgroundJobsStart )
459+ log .Info ("[leader-ready] start background jobs completed" , zap .Duration ("cost" , backgroundJobsDuration ))
406460
461+ log .Info ("[leader-ready] start to start runners" )
462+ runnersStart := time .Now ()
407463 c .running = true
408464 c .heartbeatRunner .Start (c .ctx )
409465 c .miscRunner .Start (c .ctx )
410466 c .logRunner .Start (c .ctx )
467+ runnersDuration := time .Since (runnersStart )
468+ log .Info ("[leader-ready] start runners completed" , zap .Duration ("cost" , runnersDuration ))
411469 return nil
412470}
413471
0 commit comments