Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions demo/guide-python/multioutput_reduced_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,13 @@
from typing import Tuple

import numpy as np
import xgboost as xgb
from sklearn.base import BaseEstimator
from sklearn.datasets import make_regression

import xgboost as xgb
from xgboost.objective import TreeObjective
from xgboost.objective import Objective


class LsObjMean(TreeObjective):
class LsObjMean(Objective):
"""Least squared error. Reduce the size of the gradient using mean value."""

def __init__(self, device: str) -> None:
Expand All @@ -39,12 +38,12 @@ def __call__(
if self.device == "cpu":
hess = np.ones(grad.shape)
return grad, hess
else:
import cupy as cp

hess = cp.ones(grad.shape)
import cupy as cp

hess = cp.ones(grad.shape)

return cp.array(grad), cp.array(hess)
return cp.array(grad), cp.array(hess)

def split_grad(
self, iteration: int, grad: np.ndarray, hess: np.ndarray
Expand All @@ -59,7 +58,7 @@ def split_grad(
return sgrad, shess


def svd_class(device: str) -> BaseEstimator:
def svd_class() -> BaseEstimator:
"""One of the methods in the sketch boost paper."""
from sklearn.decomposition import TruncatedSVD

Expand All @@ -77,10 +76,10 @@ def __init__(self, device: str) -> None:
def split_grad(
self, iteration: int, grad: np.ndarray, hess: np.ndarray
) -> Tuple[np.ndarray, np.ndarray]:
svd = svd_class(self.device)
svd = svd_class()
if self.device == "cuda":
grad = grad.get() # type: ignore
hess = hess.get() # type: ignore
grad = grad.get() # type: ignore
hess = hess.get() # type: ignore

svd.fit(grad)
grad = svd.transform(grad)
Expand All @@ -95,6 +94,7 @@ def split_grad(


def main() -> None:
"""Entry point to the demo, use `--device` to choose between CPU and GPU."""
parser = argparse.ArgumentParser()
parser.add_argument("--device", choices=["cpu", "cuda"], default="cpu")
args = parser.parse_args()
Expand Down
6 changes: 3 additions & 3 deletions doc/tutorials/multioutput.rst
Original file line number Diff line number Diff line change
Expand Up @@ -106,17 +106,17 @@ function for leaf values. The `Sketch Boost` paper proposes using dimensionality
on the gradient matrix. In practice, one can also define a different but related loss with
a small gradient matrix for finding the tree structure.

To access this feature, create a custom objective that inherits from ``TreeObjective`` and
To access this feature, create a custom objective that inherits from ``Objective`` and
implement the ``split_grad`` method.

.. code-block:: python

from xgboost.objective import TreeObjective
from xgboost.objective import Objective
from cuml.decomposition import TruncatedSVD

import cupy as cp

class LsObj(TreeObjective):
class LsObj(Objective):
def __call__(self, iteration: int, y_pred, dtrain):
"""Least squared error."""
y_true = dtrain.get_label()
Expand Down
21 changes: 6 additions & 15 deletions python-package/xgboost/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
is_pyarrow_available,
py_str,
)
from .objective import Objective, TreeObjective, _grad_arrinf
from .objective import Objective, _BuiltInObjective, _grad_arrinf, _stringify

if TYPE_CHECKING:
from pandas import DataFrame as PdDataFrame
Expand Down Expand Up @@ -2162,13 +2162,11 @@ def set_param(
elif isinstance(params, str) and value is not None:
params = [(params, value)]
for key, val in cast(Iterable[Tuple[str, str]], params):
if isinstance(val, np.ndarray):
val = val.tolist()
elif hasattr(val, "__cuda_array_interface__") and hasattr(val, "tolist"):
val = val.tolist()
if val is not None:
_check_call(
_LIB.XGBoosterSetParam(self.handle, c_str(key), c_str(str(val)))
_LIB.XGBoosterSetParam(
self.handle, c_str(key), c_str(_stringify(val))
)
)

def update(
Expand Down Expand Up @@ -2198,7 +2196,7 @@ def update(
raise TypeError(f"Invalid training matrix: {type(dtrain).__name__}")
self._assign_dmatrix_features(dtrain)

if fobj is None:
if fobj is None or isinstance(fobj, _BuiltInObjective):
_check_call(
_LIB.XGBoosterUpdateOneIter(
self.handle, ctypes.c_int(iteration), dtrain.handle
Expand Down Expand Up @@ -2280,21 +2278,14 @@ def train_one_iter(grad: NumpyOrCupy, hess: NumpyOrCupy) -> None:
vgrad: Optional[ArrayLike]
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just curious what the latency differences between using the python interface or internal. One possible simplification could be to always use the python, there is just one code path.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The internal assumes split gradient must be available, and only does an extra leaf value computation at the end of iteration if there's an extra leaf value gradient. This is easy to implement as we only need one extra step.

But that's not intuitive to users since the algorithm creates an extra "split gradient", at least that's the mental model. So, the interface assumes a value gradient is available, as in normal gradient boosting. The assumption is switched here.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I misread the original question. There's no difference in latency, it's just setting parameter. Yes, I think a single code path would be super nice.

vhess: Optional[ArrayLike]

if isinstance(fobj, TreeObjective):
# full gradient for leaf values
if isinstance(fobj, Objective):
vgrad, vhess = fobj(iteration, y_pred, dtrain)
# Reduced gradient for split nodes
split_grad = fobj.split_grad(iteration, vgrad, vhess)
# Switch the role of gradient if there's no split gradient but the tree
# objective is used.
if split_grad is not None:
sgrad, shess = split_grad
else:
sgrad, shess = vgrad, vhess
vgrad, vhess = None, None
elif isinstance(fobj, Objective):
sgrad, shess = fobj(iteration, y_pred, dtrain)
vgrad, vhess = None, None
else:
# Plain callable
sgrad, shess = fobj(y_pred, dtrain)
Expand Down
183 changes: 169 additions & 14 deletions python-package/xgboost/objective.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# pylint: disable=missing-class-docstring
"""Experimental support for a new objective interface with target dimension
reduction.


This module exposes built-in objectives like ``reg:squarederror`` into the Python
interface, and enables users to specify parameters for some objectives like
``reg:quantileerror``. In addition, one can define a custom ``split_grad`` for training
vector-leaf models.

.. warning::

Do not use this module unless you want to participate in development.
Expand All @@ -11,7 +18,7 @@

import warnings
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Tuple
from typing import TYPE_CHECKING, Any, Dict, Tuple

import numpy as np

Expand Down Expand Up @@ -44,18 +51,6 @@ def __call__(
self, iteration: int, y_pred: ArrayLike, dtrain: "DMatrix"
) -> Tuple[ArrayLike, ArrayLike]: ...


class TreeObjective(Objective):
"""Base class for tree-specific custom objective functions.

.. warning::

Do not use this class unless you want to participate in development.

.. versionadded:: 3.2.0

"""

# pylint: disable=unused-argument
def split_grad(
self, iteration: int, grad: ArrayLike, hess: ArrayLike
Expand All @@ -64,8 +59,168 @@ def split_grad(
return None


class _BuiltInObjective:
"""Base class for Python wrappers of built-in C++ objective functions."""

_name: str = ""
_KNOWN_PARAMS: Dict[str, str] = {}

def __init__(self, **kwargs: Any) -> None:
self._params: Dict[str, Any] = {}
for py_name in self._KNOWN_PARAMS:
self._params[py_name] = kwargs.pop(py_name, None)
if kwargs:
raise TypeError(f"Unknown parameters for {self._name}: {list(kwargs)}")

@property
def name(self) -> str:
"""The objective name string."""
return self._name

# pylint: disable=missing-function-docstring
def flat_params(self) -> Dict[str, str]:
result: Dict[str, str] = {"objective": self._name}
for py_name, cpp_name in self._KNOWN_PARAMS.items():
value = self._params[py_name]
if value is not None:
result[cpp_name] = _stringify(value)
return result


def _stringify(value: Any) -> str:
if isinstance(value, np.ndarray):
value = value.tolist()
elif hasattr(value, "__cuda_array_interface__") and hasattr(value, "tolist"):
value = value.tolist()
return str(value)


# Regression objectives


class RegSquaredError(_BuiltInObjective):
_name = "reg:squarederror"
_KNOWN_PARAMS = {"scale_pos_weight": "scale_pos_weight"}


class RegSquaredLogError(_BuiltInObjective):
_name = "reg:squaredlogerror"


class RegAbsoluteError(_BuiltInObjective):
_name = "reg:absoluteerror"


class RegPseudoHuberError(_BuiltInObjective):
_name = "reg:pseudohubererror"
_KNOWN_PARAMS = {"delta": "huber_slope"}


class RegQuantileError(_BuiltInObjective):
_name = "reg:quantileerror"
_KNOWN_PARAMS = {"alpha": "quantile_alpha"}


class RegExpectileError(_BuiltInObjective):
_name = "reg:expectileerror"
_KNOWN_PARAMS = {"alpha": "expectile_alpha"}


class RegTweedie(_BuiltInObjective):
_name = "reg:tweedie"
_KNOWN_PARAMS = {"variance_power": "tweedie_variance_power"}


class CountPoisson(_BuiltInObjective):
_name = "count:poisson"
_KNOWN_PARAMS = {"max_delta_step": "max_delta_step"}


# Logistic / classification objectives


class RegLogistic(_BuiltInObjective):
_name = "reg:logistic"
_KNOWN_PARAMS = {"scale_pos_weight": "scale_pos_weight"}


class BinaryLogistic(_BuiltInObjective):
_name = "binary:logistic"
_KNOWN_PARAMS = {"scale_pos_weight": "scale_pos_weight"}


class RegGamma(_BuiltInObjective):
_name = "reg:gamma"
_KNOWN_PARAMS = {"scale_pos_weight": "scale_pos_weight"}


class BinaryLogitRaw(_BuiltInObjective):
_name = "binary:logitraw"
_KNOWN_PARAMS = {"scale_pos_weight": "scale_pos_weight"}


class BinaryHinge(_BuiltInObjective):
_name = "binary:hinge"


# Multiclass objectives


class MultiSoftmax(_BuiltInObjective):
_name = "multi:softmax"
_KNOWN_PARAMS = {"num_class": "num_class"}


class MultiSoftprob(_BuiltInObjective):
_name = "multi:softprob"
_KNOWN_PARAMS = {"num_class": "num_class"}


# Survival objectives


class SurvivalAFT(_BuiltInObjective):
_name = "survival:aft"
_KNOWN_PARAMS = {
"distribution": "aft_loss_distribution",
"distribution_scale": "aft_loss_distribution_scale",
}


class SurvivalCox(_BuiltInObjective):
_name = "survival:cox"


# Ranking objectives


class RankNDCG(_BuiltInObjective):
_name = "rank:ndcg"
_KNOWN_PARAMS = {
"pair_method": "lambdarank_pair_method",
"num_pair_per_sample": "lambdarank_num_pair_per_sample",
"unbiased": "lambdarank_unbiased",
"exp_gain": "ndcg_exp_gain",
}


class RankPairwise(_BuiltInObjective):
_name = "rank:pairwise"
_KNOWN_PARAMS = {
"pair_method": "lambdarank_pair_method",
"num_pair_per_sample": "lambdarank_num_pair_per_sample",
}


class RankMAP(_BuiltInObjective):
_name = "rank:map"
_KNOWN_PARAMS = {
"pair_method": "lambdarank_pair_method",
"num_pair_per_sample": "lambdarank_num_pair_per_sample",
}


def _grad_arrinf(array: NumpyOrCupy, n_samples: int) -> bytes:
# Can we check for __array_interface__ instead of a specific type instead?
msg = (
"Expecting `np.ndarray` or `cupy.ndarray` for gradient and hessian."
f" Got: {type(array)}"
Expand Down
Loading
Loading