-
-
Notifications
You must be signed in to change notification settings - Fork 8.9k
xgboost 3.2.0 crashes with cudf 26.02 when there are categorical features #12138
Copy link
Copy link
Open
Description
Latest stable version of xgboost does not work with cudf 26.02 when there are categorical features. It is fine with cudf 25.12.
Code to replicate:
import numpy as np
import cudf, xgboost
from xgboost import XGBRegressor
print(F'{cudf.__version__=}')
print(F'{xgboost.__version__=}')
rng = np.random.default_rng(0)
X = cudf.DataFrame(rng.random((100,10))).astype(str).astype('category')
y = cudf.Series(rng.random(100))
model = XGBRegressor(enable_categorical=True, device='cuda').fit(X, y)Expected output:
cudf.__version__='26.02.01'
xgboost.__version__='3.2.0'
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/tmp/ipykernel_55/3028577614.py in <cell line: 0>()
10 y = cudf.Series(rng.random(100))
11
---> 12 model = XGBRegressor(enable_categorical=True, device='cuda').fit(X, y)
/usr/local/lib/python3.12/dist-packages/xgboost/core.py in inner_f(*args, **kwargs)
749 for k, arg in zip(sig.parameters, args):
750 kwargs[k] = arg
--> 751 return func(**kwargs)
752
753 return inner_f
/usr/local/lib/python3.12/dist-packages/xgboost/sklearn.py in fit(self, X, y, sample_weight, base_margin, eval_set, verbose, xgb_model, sample_weight_eval_set, base_margin_eval_set, feature_weights)
1341
1342 evals_result: EvalsLog = {}
-> 1343 train_dmatrix, evals = _wrap_evaluation_matrices(
1344 missing=self.missing,
1345 X=X,
/usr/local/lib/python3.12/dist-packages/xgboost/sklearn.py in _wrap_evaluation_matrices(missing, X, y, group, qid, sample_weight, base_margin, feature_weights, eval_set, sample_weight_eval_set, base_margin_eval_set, eval_group, eval_qid, create_dmatrix, enable_categorical, feature_types)
698 """
699 # Feature_types contains the optional reference categories from the booster object.
--> 700 train_dmatrix = create_dmatrix(
701 data=X,
702 label=y,
/usr/local/lib/python3.12/dist-packages/xgboost/sklearn.py in _create_dmatrix(self, ref, **kwargs)
1260 except TypeError: # `QuantileDMatrix` supports lesser types than DMatrix
1261 pass
-> 1262 return DMatrix(**kwargs, nthread=self.n_jobs)
1263
1264 def _set_evaluation_result(self, evals_result: EvalsLog) -> None:
/usr/local/lib/python3.12/dist-packages/xgboost/core.py in inner_f(*args, **kwargs)
749 for k, arg in zip(sig.parameters, args):
750 kwargs[k] = arg
--> 751 return func(**kwargs)
752
753 return inner_f
/usr/local/lib/python3.12/dist-packages/xgboost/core.py in __init__(self, data, label, weight, base_margin, missing, silent, feature_names, feature_types, nthread, group, qid, label_lower_bound, label_upper_bound, feature_weights, enable_categorical, data_split_mode)
974 return
975
--> 976 handle, feature_names, feature_types = dispatch_data_backend(
977 data=data,
978 missing=self.missing,
/usr/local/lib/python3.12/dist-packages/xgboost/data.py in dispatch_data_backend(data, missing, threads, feature_names, feature_types, enable_categorical, data_split_mode)
1437 )
1438 if _is_cudf_df(data) or _is_cudf_ser(data):
-> 1439 return _from_cudf_df(
1440 data=data,
1441 missing=missing,
/usr/local/lib/python3.12/dist-packages/xgboost/data.py in _from_cudf_df(data, missing, nthread, feature_names, feature_types, enable_categorical)
1130 enable_categorical: bool,
1131 ) -> DispatchedDataBackendReturnType:
-> 1132 df, feature_names, feature_types = _transform_cudf_df(
1133 data, feature_names, feature_types, enable_categorical
1134 )
/usr/local/lib/python3.12/dist-packages/xgboost/data.py in _transform_cudf_df(data, feature_names, feature_types, enable_categorical)
1115
1116 return (
-> 1117 CudfTransformed(result, ref_categories=ref_categories),
1118 feature_names,
1119 feature_types,
/usr/local/lib/python3.12/dist-packages/xgboost/data.py in __init__(self, columns, ref_categories)
1026
1027 for col in self.columns:
-> 1028 push_series(col)
1029
1030 super().__init__(
/usr/local/lib/python3.12/dist-packages/xgboost/data.py in push_series(ser)
1017 if _is_df_cat(ser):
1018 cats, codes = ser.categories, ser.codes
-> 1019 cats_ainf, codes_ainf, buf = cudf_cat_inf(cats, codes)
1020 temporary_buffers.append(buf)
1021 aitfs.append((cats_ainf, codes_ainf))
/usr/local/lib/python3.12/dist-packages/xgboost/_data_utils.py in cudf_cat_inf(cats, codes)
596
597 # pylint: disable=protected-access
--> 598 arrow_col = cats._column.to_pylibcudf(mode="read")
599 # Tuple[types.CapsuleType, types.CapsuleType]
600 schema, array = arrow_col.__arrow_c_device_array__()
TypeError: ColumnBase.to_pylibcudf() got an unexpected keyword argument 'mode'
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels