dmlc · ashutoshk314 · Feb 10, 2026 · Feb 11, 2026 · Feb 11, 2026
diff --git a/.gitattributes b/.gitattributes
@@ -15,4 +15,4 @@
 
 *.rst text eol=lf
 *.md  text eol=lf
-*.csv text eol=lf
+*.csv text eol=lf
diff --git a/.gitignore b/.gitignore
@@ -165,4 +165,4 @@ Rplots.pdf
 
 # nsys
 *.nsys-rep
-rmm_log.dev*
+rmm_log.dev*
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
@@ -2,9 +2,9 @@ Contributors of DMLC/XGBoost
 ============================
 XGBoost has been developed and used by a group of active community. Everyone is more than welcomed to is a great way to make the project better and more accessible to more users.
 
-Project Management Committee(PMC) 
+Project Management Committee(PMC)
 ----------
-The Project Management Committee(PMC) consists group of active committers that moderate the discussion, manage the project release, and proposes new committer/PMC members. 
+The Project Management Committee(PMC) consists group of active committers that moderate the discussion, manage the project release, and proposes new committer/PMC members.
 
 * [Tianqi Chen](https://github.com/tqchen), University of Washington
   - Tianqi is a Ph.D. student working on large-scale machine learning. He is the creator of the project.
@@ -19,7 +19,7 @@ The Project Management Committee(PMC) consists group of active committers that m
 * [Hyunsu Cho](http://hyunsu-cho.io/), NVIDIA
   - Hyunsu is the maintainer of the XGBoost Python package. He also manages the Jenkins continuous integration system (https://xgboost-ci.net/). He is the initial author of the CPU 'hist' updater.
 * [Rory Mitchell](https://github.com/RAMitchell), University of Waikato
-  - Rory is a Ph.D. student at University of Waikato. He is the original creator of the GPU training algorithms. He improved the CMake build system and continuous integration. 
+  - Rory is a Ph.D. student at University of Waikato. He is the original creator of the GPU training algorithms. He improved the CMake build system and continuous integration.
 * [Hongliang Liu](https://github.com/phunterlau)
 
 

diff --git a/cmake/RPackageInstall.cmake.in b/cmake/RPackageInstall.cmake.in
@@ -31,4 +31,4 @@ set(XGB_DEPS_SCRIPT
 check_call(COMMAND "${LIBR_EXECUTABLE}" -q -e "${XGB_DEPS_SCRIPT}")
 
 # Install the XGBoost R package
-check_call(COMMAND "${LIBR_EXECUTABLE}" CMD INSTALL --no-multiarch --build "${build_dir}/R-package")
+check_call(COMMAND "${LIBR_EXECUTABLE}" CMD INSTALL --no-multiarch --build "${build_dir}/R-package")
diff --git a/demo/aft_survival/aft_survival_demo.py b/demo/aft_survival/aft_survival_demo.py
@@ -9,54 +9,63 @@
 
 import numpy as np
 import pandas as pd
-from sklearn.model_selection import ShuffleSplit
-
 import xgboost as xgb
+from sklearn.model_selection import ShuffleSplit
 
 # The Veterans' Administration Lung Cancer Trial
 # The Statistical Analysis of Failure Time Data by Kalbfleisch J. and Prentice R (1980)
 CURRENT_DIR = os.path.dirname(__file__)
-df = pd.read_csv(os.path.join(CURRENT_DIR, '../data/veterans_lung_cancer.csv'))
-print('Training data:')
+df = pd.read_csv(os.path.join(CURRENT_DIR, "../data/veterans_lung_cancer.csv"))
+print("Training data:")
 print(df)
 
 # Split features and labels
-y_lower_bound = df['Survival_label_lower_bound']
-y_upper_bound = df['Survival_label_upper_bound']
-X = df.drop(['Survival_label_lower_bound', 'Survival_label_upper_bound'], axis=1)
+y_lower_bound = df["Survival_label_lower_bound"]
+y_upper_bound = df["Survival_label_upper_bound"]
+X = df.drop(["Survival_label_lower_bound", "Survival_label_upper_bound"], axis=1)
 
 # Split data into training and validation sets
-rs = ShuffleSplit(n_splits=2, test_size=.7, random_state=0)
+rs = ShuffleSplit(n_splits=2, test_size=0.7, random_state=0)
 train_index, valid_index = next(rs.split(X))
 dtrain = xgb.DMatrix(X.values[train_index, :])
-dtrain.set_float_info('label_lower_bound', y_lower_bound[train_index])
-dtrain.set_float_info('label_upper_bound', y_upper_bound[train_index])
+dtrain.set_float_info("label_lower_bound", y_lower_bound[train_index])
+dtrain.set_float_info("label_upper_bound", y_upper_bound[train_index])
 dvalid = xgb.DMatrix(X.values[valid_index, :])
-dvalid.set_float_info('label_lower_bound', y_lower_bound[valid_index])
-dvalid.set_float_info('label_upper_bound', y_upper_bound[valid_index])
+dvalid.set_float_info("label_lower_bound", y_lower_bound[valid_index])
+dvalid.set_float_info("label_upper_bound", y_upper_bound[valid_index])
 
 # Train gradient boosted trees using AFT loss and metric
-params = {'verbosity': 0,
-          'objective': 'survival:aft',
-          'eval_metric': 'aft-nloglik',
-          'tree_method': 'hist',
-          'learning_rate': 0.05,
-          'aft_loss_distribution': 'normal',
-          'aft_loss_distribution_scale': 1.20,
-          'max_depth': 6,
-          'lambda': 0.01,
-          'alpha': 0.02}
-bst = xgb.train(params, dtrain, num_boost_round=10000,
-                evals=[(dtrain, 'train'), (dvalid, 'valid')],
-                early_stopping_rounds=50)
+params = {
+    "verbosity": 0,
+    "objective": "survival:aft",
+    "eval_metric": "aft-nloglik",
+    "tree_method": "hist",
+    "learning_rate": 0.05,
+    "aft_loss_distribution": "normal",
+    "aft_loss_distribution_scale": 1.20,
+    "max_depth": 6,
+    "lambda": 0.01,
+    "alpha": 0.02,
+}
+bst = xgb.train(
+    params,
+    dtrain,
+    num_boost_round=10000,
+    evals=[(dtrain, "train"), (dvalid, "valid")],
+    early_stopping_rounds=50,
+)
 
 # Run prediction on the validation set
-df = pd.DataFrame({'Label (lower bound)': y_lower_bound[valid_index],
-                   'Label (upper bound)': y_upper_bound[valid_index],
-                   'Predicted label': bst.predict(dvalid)})
+df = pd.DataFrame(
+    {
+        "Label (lower bound)": y_lower_bound[valid_index],
+        "Label (upper bound)": y_upper_bound[valid_index],
+        "Predicted label": bst.predict(dvalid),
+    }
+)
 print(df)
 # Show only data points with right-censored labels
-print(df[np.isinf(df['Label (upper bound)'])])
+print(df[np.isinf(df["Label (upper bound)"])])
 
 # Save trained model
-bst.save_model('aft_model.json')
+bst.save_model("aft_model.json")
diff --git a/demo/aft_survival/aft_survival_demo_with_optuna.py b/demo/aft_survival/aft_survival_demo_with_optuna.py
@@ -6,78 +6,108 @@
 using Optuna to tune hyperparameters
 
 """
+
 import numpy as np
 import optuna
 import pandas as pd
-from sklearn.model_selection import ShuffleSplit
-
 import xgboost as xgb
+from sklearn.model_selection import ShuffleSplit
 
 # The Veterans' Administration Lung Cancer Trial
 # The Statistical Analysis of Failure Time Data by Kalbfleisch J. and Prentice R (1980)
-df = pd.read_csv('../data/veterans_lung_cancer.csv')
-print('Training data:')
+df = pd.read_csv("../data/veterans_lung_cancer.csv")
+print("Training data:")
 print(df)
 
 # Split features and labels
-y_lower_bound = df['Survival_label_lower_bound']
-y_upper_bound = df['Survival_label_upper_bound']
-X = df.drop(['Survival_label_lower_bound', 'Survival_label_upper_bound'], axis=1)
+y_lower_bound = df["Survival_label_lower_bound"]
+y_upper_bound = df["Survival_label_upper_bound"]
+X = df.drop(["Survival_label_lower_bound", "Survival_label_upper_bound"], axis=1)
 
 # Split data into training and validation sets
-rs = ShuffleSplit(n_splits=2, test_size=.7, random_state=0)
+rs = ShuffleSplit(n_splits=2, test_size=0.7, random_state=0)
 train_index, valid_index = next(rs.split(X))
 dtrain = xgb.DMatrix(X.values[train_index, :])
-dtrain.set_float_info('label_lower_bound', y_lower_bound[train_index])
-dtrain.set_float_info('label_upper_bound', y_upper_bound[train_index])
+dtrain.set_float_info("label_lower_bound", y_lower_bound[train_index])
+dtrain.set_float_info("label_upper_bound", y_upper_bound[train_index])
 dvalid = xgb.DMatrix(X.values[valid_index, :])
-dvalid.set_float_info('label_lower_bound', y_lower_bound[valid_index])
-dvalid.set_float_info('label_upper_bound', y_upper_bound[valid_index])
+dvalid.set_float_info("label_lower_bound", y_lower_bound[valid_index])
+dvalid.set_float_info("label_upper_bound", y_upper_bound[valid_index])
 
 # Define hyperparameter search space
-base_params = {'verbosity': 0,
-              'objective': 'survival:aft',
-              'eval_metric': 'aft-nloglik',
-              'tree_method': 'hist'}  # Hyperparameters common to all trials
+base_params = {
+    "verbosity": 0,
+    "objective": "survival:aft",
+    "eval_metric": "aft-nloglik",
+    "tree_method": "hist",
+}  # Hyperparameters common to all trials
+
+
 def objective(trial):
-    params = {'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 1.0),
-              'aft_loss_distribution': trial.suggest_categorical('aft_loss_distribution',
-                                                                  ['normal', 'logistic', 'extreme']),
-              'aft_loss_distribution_scale': trial.suggest_loguniform('aft_loss_distribution_scale', 0.1, 10.0),
-              'max_depth': trial.suggest_int('max_depth', 3, 8),
-              'lambda': trial.suggest_loguniform('lambda', 1e-8, 1.0),
-              'alpha': trial.suggest_loguniform('alpha', 1e-8, 1.0)}  # Search space
+    params = {
+        "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 1.0),
+        "aft_loss_distribution": trial.suggest_categorical(
+            "aft_loss_distribution", ["normal", "logistic", "extreme"]
+        ),
+        "aft_loss_distribution_scale": trial.suggest_loguniform(
+            "aft_loss_distribution_scale", 0.1, 10.0
+        ),
+        "max_depth": trial.suggest_int("max_depth", 3, 8),
+        "lambda": trial.suggest_loguniform("lambda", 1e-8, 1.0),
+        "alpha": trial.suggest_loguniform("alpha", 1e-8, 1.0),
+    }  # Search space
     params.update(base_params)
-    pruning_callback = optuna.integration.XGBoostPruningCallback(trial, 'valid-aft-nloglik')
-    bst = xgb.train(params, dtrain, num_boost_round=10000,
-                    evals=[(dtrain, 'train'), (dvalid, 'valid')],
-                    early_stopping_rounds=50, verbose_eval=False, callbacks=[pruning_callback])
+    pruning_callback = optuna.integration.XGBoostPruningCallback(
+        trial, "valid-aft-nloglik"
+    )
+    bst = xgb.train(
+        params,
+        dtrain,
+        num_boost_round=10000,
+        evals=[(dtrain, "train"), (dvalid, "valid")],
+        early_stopping_rounds=50,
+        verbose_eval=False,
+        callbacks=[pruning_callback],
+    )
     if bst.best_iteration >= 25:
         return bst.best_score
     else:
         return np.inf  # Reject models with < 25 trees
 
+
 # Run hyperparameter search
-study = optuna.create_study(direction='minimize')
+study = optuna.create_study(direction="minimize")
 study.optimize(objective, n_trials=200)
-print('Completed hyperparameter tuning with best aft-nloglik = {}.'.format(study.best_trial.value))
+print(
+    "Completed hyperparameter tuning with best aft-nloglik = {}.".format(
+        study.best_trial.value
+    )
+)
 params = {}
 params.update(base_params)
 params.update(study.best_trial.params)
 
 # Re-run training with the best hyperparameter combination
-print('Re-running the best trial... params = {}'.format(params))
-bst = xgb.train(params, dtrain, num_boost_round=10000,
-                evals=[(dtrain, 'train'), (dvalid, 'valid')],
-                early_stopping_rounds=50)
+print("Re-running the best trial... params = {}".format(params))
+bst = xgb.train(
+    params,
+    dtrain,
+    num_boost_round=10000,
+    evals=[(dtrain, "train"), (dvalid, "valid")],
+    early_stopping_rounds=50,
+)
 
 # Run prediction on the validation set
-df = pd.DataFrame({'Label (lower bound)': y_lower_bound[valid_index],
-                   'Label (upper bound)': y_upper_bound[valid_index],
-                   'Predicted label': bst.predict(dvalid)})
+df = pd.DataFrame(
+    {
+        "Label (lower bound)": y_lower_bound[valid_index],
+        "Label (upper bound)": y_upper_bound[valid_index],
+        "Predicted label": bst.predict(dvalid),
+    }
+)
 print(df)
 # Show only data points with right-censored labels
-print(df[np.isinf(df['Label (upper bound)'])])
+print(df[np.isinf(df["Label (upper bound)"])])
 
 # Save trained model
-bst.save_model('aft_best_model.json')
+bst.save_model("aft_best_model.json")
diff --git a/demo/aft_survival/aft_survival_viz_demo.py b/demo/aft_survival/aft_survival_viz_demo.py
@@ -9,7 +9,6 @@
 
 import matplotlib.pyplot as plt
 import numpy as np
-
 import xgboost as xgb
 
 plt.rcParams.update({"font.size": 13})

diff --git a/demo/c-api/external-memory/README.md b/demo/c-api/external-memory/README.md
@@ -13,4 +13,4 @@ In the example, we define a custom data iterator with 2 methods: `reset` and `ne
 its end, and the `reset` method resets iterations. One important detail when using the C
 API for data iterator is users need to make sure that the data passed into `next` method
 must be kept in memory until the next iteration or `reset` is called.  The external memory
-DMatrix is not limited to training, but also valid for other features like prediction.
+DMatrix is not limited to training, but also valid for other features like prediction.
diff --git a/demo/dask/cpu_survival.py b/demo/dask/cpu_survival.py
@@ -9,7 +9,6 @@
 import dask.array as da
 import dask.dataframe as dd
 from dask.distributed import Client, LocalCluster
-
 from xgboost import dask as dxgb
 from xgboost.dask import DaskDMatrix
 

diff --git a/demo/dask/cpu_training.py b/demo/dask/cpu_training.py
@@ -6,7 +6,6 @@
 
 from dask import array as da
 from dask.distributed import Client, LocalCluster
-
 from xgboost import dask as dxgb
 from xgboost.dask import DaskDMatrix
 

diff --git a/demo/dask/dask_callbacks.py b/demo/dask/dask_callbacks.py
@@ -6,12 +6,11 @@
 from typing import Any
 
 import numpy as np
+import xgboost as xgb
+import xgboost.dask as dxgb
 from dask.distributed import Client, LocalCluster
 from dask_ml.datasets import make_regression
 from dask_ml.model_selection import train_test_split
-
-import xgboost as xgb
-import xgboost.dask as dxgb
 from xgboost.dask import DaskDMatrix
 
 

diff --git a/demo/dask/dask_learning_to_rank.py b/demo/dask/dask_learning_to_rank.py
@@ -25,7 +25,6 @@
 from dask import dataframe as dd
 from distributed import Client, LocalCluster, wait
 from sklearn.datasets import load_svmlight_file
-
 from xgboost import dask as dxgb
 
 

diff --git a/demo/dask/forward_logging.py b/demo/dask/forward_logging.py
@@ -14,7 +14,6 @@
 from dask import array as da
 from dask_cuda import LocalCUDACluster
 from distributed import Client
-
 from xgboost import dask as dxgb
 from xgboost.callback import EvaluationMonitor
 

diff --git a/demo/dask/gpu_training.py b/demo/dask/gpu_training.py
@@ -9,7 +9,6 @@
 from dask import dataframe as dd
 from dask.distributed import Client
 from dask_cuda import LocalCUDACluster
-
 from xgboost import dask as dxgb
 from xgboost.dask import DaskDMatrix
 

diff --git a/demo/dask/sklearn_cpu_training.py b/demo/dask/sklearn_cpu_training.py
@@ -5,7 +5,6 @@
 
 from dask import array as da
 from dask.distributed import Client, LocalCluster
-
 from xgboost import dask as dxgb
 
 

diff --git a/demo/dask/sklearn_gpu_training.py b/demo/dask/sklearn_gpu_training.py
@@ -9,7 +9,6 @@
 
 # It's recommended to use dask_cuda for GPU assignment
 from dask_cuda import LocalCUDACluster
-
 from xgboost import dask as dxgb
Original file line number	Diff line number	Diff line change
Expand Up		@@ -5,7 +5,6 @@

		from dask import array as da
		from dask.distributed import Client, LocalCluster

		from xgboost import dask as dxgb


Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -9,7 +9,6 @@

		# It's recommended to use dask_cuda for GPU assignment
		from dask_cuda import LocalCUDACluster

		from xgboost import dask as dxgb


Expand Down