Note
Go to the end to download the full example code.
Demo for creating customized multi-class objective function
This demo is only applicable after (excluding) XGBoost 1.0.0, as before this version XGBoost returns transformed prediction for multi-class objective function. More details in comments.
See Custom Objective and Evaluation Metric and Advanced Usage of Custom Objectives for detailed tutorial and notes.
# pylint: disable=missing-function-docstring,redefined-outer-name,unused-variable
import argparse
from typing import Dict, Tuple
import numpy as np
import xgboost as xgb
from matplotlib import pyplot as plt
np.random.seed(1994)
kRows = 100
kCols = 10
kClasses = 4 # number of classes
kRounds = 10 # number of boosting rounds.
# Generate some random data for demo.
X = np.random.randn(kRows, kCols)
y = np.random.randint(0, 4, size=kRows)
m = xgb.DMatrix(X, y)
def softmax(x: np.ndarray) -> np.ndarray:
"""Softmax function with x as input vector."""
e = np.exp(x)
return e / np.sum(e)
def softprob_obj(predt: np.ndarray, data: xgb.DMatrix) -> Tuple[np.ndarray, np.ndarray]:
"""Loss function. Computing the gradient and upper bound on the
Hessian with a diagonal structure for XGBoost (note that this is
not the true Hessian).
Reimplements the `multi:softprob` inside XGBoost.
"""
labels = data.get_label()
if data.get_weight().size == 0:
# Use 1 as weight if we don't have custom weight.
weights = np.ones(kRows, dtype=float)
else:
weights = data.get_weight()
# The prediction is of shape (rows, classes), each element in a row
# represents a raw prediction (leaf weight, hasn't gone through softmax
# yet). In XGBoost 1.0.0, the prediction is transformed by a softmax
# function, fixed in later versions.
assert predt.shape == (kRows, kClasses)
grad = np.zeros((kRows, kClasses), dtype=float)
hess = np.zeros((kRows, kClasses), dtype=float)
eps = 1e-6
# compute the gradient and hessian upper bound, slow iterations in Python, only
# suitable for demo. Also the one in native XGBoost core is more robust to
# numeric overflow as we don't do anything to mitigate the `exp` in
# `softmax` here.
for r in range(predt.shape[0]):
target = int(labels[r])
weight = float(weights[r])
p = softmax(predt[r, :])
for c in range(predt.shape[1]):
assert 0 <= target < kClasses
pc = float(p[c])
g = pc - 1.0 if c == target else pc
g = g * weight
h = max(2.0 * pc * (1.0 - pc) * weight, eps)
grad[r, c] = g
hess[r, c] = h
# After 2.1.0, pass the gradient as it is.
return grad, hess
def predict(booster: xgb.Booster, X: xgb.DMatrix) -> np.ndarray:
"""A customized prediction function that converts raw prediction to
target class.
"""
# Output margin means we want to obtain the raw prediction obtained from
# tree leaf weight.
predt = booster.predict(X, output_margin=True)
out = np.zeros(kRows)
for r in range(predt.shape[0]):
# the class with maximum prob (not strictly prob as it haven't gone
# through softmax yet so it doesn't sum to 1, but result is the same
# for argmax).
i = np.argmax(predt[r])
out[r] = i
return out
def merror(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, np.float64]:
y = dtrain.get_label()
# Like custom objective, the predt is untransformed leaf weight when custom
# objective is provided.
# With the use of `custom_metric` parameter in train function, custom metric
# receives raw input only when custom objective is also being used. Otherwise
# custom metric will receive transformed prediction.
assert predt.shape == (kRows, kClasses)
out = np.zeros(kRows)
for r in range(predt.shape[0]):
i = np.argmax(predt[r])
out[r] = i
assert y.shape == out.shape
errors = np.zeros(kRows)
errors[y != out] = 1.0
return "PyMError", np.sum(errors) / kRows
def plot_history(
custom_results: Dict[str, Dict], native_results: Dict[str, Dict]
) -> None:
axs: np.ndarray
fig, axs = plt.subplots(2, 1) # type: ignore
ax0 = axs[0]
ax1 = axs[1]
pymerror = custom_results["train"]["PyMError"]
merror = native_results["train"]["merror"]
x = np.arange(0, kRounds, 1)
ax0.plot(x, pymerror, label="Custom objective")
ax0.legend()
ax1.plot(x, merror, label="multi:softmax")
ax1.legend()
plt.show()
def main(args: argparse.Namespace) -> None:
# Since 3.1, XGBoost can estimate the base_score automatically for built-in
# multi-class objectives.
#
# We explicitly specify it here to disable the automatic estimation to have a proper
# comparison between the custom implementation and the built-in implementation.
intercept = np.full(shape=(kClasses,), fill_value=1 / kClasses)
custom_results: Dict[str, Dict] = {}
# Use our custom objective function
booster_custom = xgb.train(
{
"num_class": kClasses,
"base_score": intercept,
"disable_default_eval_metric": True,
},
m,
num_boost_round=kRounds,
obj=softprob_obj,
custom_metric=merror,
evals_result=custom_results,
evals=[(m, "train")],
)
predt_custom = predict(booster_custom, m)
native_results: Dict[str, Dict] = {}
# Use the same objective function defined in XGBoost.
booster_native = xgb.train(
{
"num_class": kClasses,
"base_score": intercept,
"objective": "multi:softmax",
"eval_metric": "merror",
},
m,
num_boost_round=kRounds,
evals_result=native_results,
evals=[(m, "train")],
)
predt_native = booster_native.predict(m)
# We are reimplementing the loss function in XGBoost, so it should
# be the same for normal cases.
assert np.all(predt_custom == predt_native)
np.testing.assert_allclose(
custom_results["train"]["PyMError"], native_results["train"]["merror"]
)
if args.plot != 0:
plot_history(custom_results, native_results)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Arguments for custom softmax objective function demo."
)
parser.add_argument(
"--plot",
type=int,
default=1,
help="Set to 0 to disable plotting the evaluation history.",
)
args = parser.parse_args()
main(args)