@@ -334,6 +334,15 @@ func (c *RaftCluster) InitCluster(
334334
335335// Start starts a cluster.
336336func (c * RaftCluster ) Start (s Server , bootstrap bool ) (err error ) {
337+ start := time .Now ()
338+ defer func () {
339+ startType := "non-bootstrap"
340+ if bootstrap {
341+ startType = "bootstrap"
342+ }
343+ raftClusterStartDuration .WithLabelValues (startType ).Observe (time .Since (start ).Seconds ())
344+ }()
345+
337346 c .Lock ()
338347 defer c .Unlock ()
339348
@@ -342,15 +351,22 @@ func (c *RaftCluster) Start(s Server, bootstrap bool) (err error) {
342351 return nil
343352 }
344353 c .isKeyspaceGroupEnabled = s .IsKeyspaceGroupEnabled ()
354+ initClusterStart := time .Now ()
345355 err = c .InitCluster (s .GetAllocator (), s .GetPersistOptions (), s .GetHBStreams (), s .GetKeyspaceGroupManager ())
346356 if err != nil {
357+ log .Warn ("failed to initialize cluster" , errs .ZapError (err ), zap .Duration ("cost" , time .Since (initClusterStart )))
347358 return err
348359 }
360+ initClusterDuration := time .Since (initClusterStart )
361+ log .Info ("initialize cluster completed" , zap .Duration ("cost" , initClusterDuration ))
349362 // We should not manage tso service when bootstrap try to start raft cluster.
350363 // It only is controlled by leader election.
351364 // Ref: https://github.com/tikv/pd/issues/8836
352365 if ! bootstrap {
366+ checkTSOStart := time .Now ()
353367 c .checkTSOService ()
368+ checkTSODuration := time .Since (checkTSOStart )
369+ log .Info ("check TSO service completed" , zap .Duration ("cost" , checkTSODuration ))
354370 }
355371 defer func () {
356372 if ! bootstrap && err != nil {
@@ -365,25 +381,36 @@ func (c *RaftCluster) Start(s Server, bootstrap bool) (err error) {
365381 }
366382 failpoint .Return (err )
367383 })
384+ loadClusterInfoStart := time .Now ()
368385 cluster , err := c .LoadClusterInfo ()
369386 if err != nil {
387+ log .Warn ("failed to load cluster info" , errs .ZapError (err ), zap .Duration ("cost" , time .Since (loadClusterInfoStart )))
370388 return err
371389 }
372390 if cluster == nil {
373- log .Warn ("cluster is not bootstrapped" )
391+ loadClusterInfoDuration := time .Since (loadClusterInfoStart )
392+ log .Warn ("cluster is not bootstrapped" , zap .Duration ("cost" , loadClusterInfoDuration ))
374393 return nil
375394 }
376395 if c .opt .IsPlacementRulesEnabled () {
396+ ruleInitStart := time .Now ()
377397 err := c .ruleManager .Initialize (c .opt .GetMaxReplicas (), c .opt .GetLocationLabels (), c .opt .GetIsolationLevel (), false )
378398 if err != nil {
399+ log .Warn ("failed to initialize placement rules" , errs .ZapError (err ), zap .Duration ("cost" , time .Since (ruleInitStart )))
379400 return err
380401 }
402+ log .Info ("initialize placement rules completed" , zap .Duration ("cost" , time .Since (ruleInitStart )))
381403 }
382-
404+ loadClusterInfoDuration := time .Since (loadClusterInfoStart )
405+ log .Info ("load cluster info completed" , zap .Duration ("cost" , loadClusterInfoDuration ))
406+ labelerStart := time .Now ()
383407 c .regionLabeler , err = labeler .NewRegionLabeler (c .ctx , c .storage , regionLabelGCInterval )
408+ labelerDuration := time .Since (labelerStart )
384409 if err != nil {
410+ log .Warn ("region labeler creation failed" , zap .Error (err ), zap .Duration ("cost" , labelerDuration ))
385411 return err
386412 }
413+ log .Info ("region labeler created" , zap .Duration ("cost" , labelerDuration ))
387414
388415 // create affinity manager with region labeler for key range validation and rebuild
389416 c .affinityManager , err = affinity .NewManager (c .ctx , c .storage , c , c .GetOpts (), c .regionLabeler )
@@ -392,27 +419,45 @@ func (c *RaftCluster) Start(s Server, bootstrap bool) (err error) {
392419 }
393420
394421 if ! c .IsServiceIndependent (constant .SchedulingServiceName ) {
422+ observeSlowStoreStart := time .Now ()
395423 for _ , store := range c .GetStores () {
396424 storeID := store .GetID ()
397425 c .slowStat .ObserveSlowStoreStatus (storeID , store .IsSlow ())
398426 }
427+ log .Info ("observe slow store status completed" , zap .Duration ("cost" , time .Since (observeSlowStoreStart )))
399428 }
429+ replicationModeStart := time .Now ()
400430 c .replicationMode , err = replication .NewReplicationModeManager (s .GetConfig ().ReplicationMode , c .storage , cluster , s )
401431 if err != nil {
432+ log .Warn ("failed to create replication mode manager" , errs .ZapError (err ), zap .Duration ("cost" , time .Since (replicationModeStart )))
402433 return err
403434 }
435+ replicationModeDuration := time .Since (replicationModeStart )
436+ log .Info ("create replication mode manager completed" , zap .Duration ("cost" , replicationModeDuration ))
437+ loadExternalTSStart := time .Now ()
404438 c .loadExternalTS ()
439+ log .Info ("load external timestamp completed" , zap .Duration ("cost" , time .Since (loadExternalTSStart )))
440+ loadMinResolvedTSStart := time .Now ()
405441 c .loadMinResolvedTS ()
442+ log .Info ("load min resolved ts completed" , zap .Duration ("cost" , time .Since (loadMinResolvedTSStart )))
406443
407444 if c .isKeyspaceGroupEnabled {
408445 // bootstrap keyspace group manager after starting other parts successfully.
409446 // This order avoids a stuck goroutine in keyspaceGroupManager when it fails to create raftcluster.
447+ log .Info ("start to bootstrap keyspace group manager" )
448+ bootstrapKeyspaceStart := time .Now ()
410449 err = c .keyspaceGroupManager .Bootstrap (c .ctx )
411450 if err != nil {
451+ log .Warn ("failed to bootstrap keyspace group manager" , errs .ZapError (err ), zap .Duration ("cost" , time .Since (bootstrapKeyspaceStart )))
412452 return err
413453 }
454+ log .Info ("bootstrap keyspace group manager completed" , zap .Duration ("cost" , time .Since (bootstrapKeyspaceStart )))
414455 }
456+ checkSchedulingStart := time .Now ()
415457 c .checkSchedulingService ()
458+ checkSchedulingDuration := time .Since (checkSchedulingStart )
459+ log .Info ("check scheduling service completed" , zap .Duration ("cost" , checkSchedulingDuration ))
460+ backgroundJobsStart := time .Now ()
416461 c .wg .Add (11 )
417462 go c .runServiceCheckJob ()
418463 go c .runMetricsCollectionJob ()
@@ -425,12 +470,14 @@ func (c *RaftCluster) Start(s Server, bootstrap bool) (err error) {
425470 go c .startGCTuner ()
426471 go c .startProgressGC ()
427472 go c .runStorageSizeCollector (s .GetMeteringWriter (), c .regionLabeler , s .GetKeyspaceManager ())
428-
473+ log .Info ("start background jobs completed" , zap .Duration ("cost" , time .Since (backgroundJobsStart )))
474+ runnersStart := time .Now ()
429475 c .running = true
430476 c .heartbeatRunner .Start (c .ctx )
431477 c .miscRunner .Start (c .ctx )
432478 c .logRunner .Start (c .ctx )
433479 c .syncRegionRunner .Start (c .ctx )
480+ log .Info ("start runners completed" , zap .Duration ("cost" , time .Since (runnersStart )))
434481 return nil
435482}
436483
0 commit comments