Note
Go to the end to download the full example code.
Demo for using feature weight to change column sampling
Added in version 1.3.0.
import argparse
import numpy as np
from matplotlib import pyplot as plt
import xgboost
def main(args: argparse.Namespace) -> None:
rng = np.random.RandomState(1994)
kRows = 4196
kCols = 10
X = rng.randn(kRows, kCols)
y = rng.randn(kRows)
fw = np.ones(shape=(kCols,))
for i in range(kCols):
fw[i] *= float(i)
dtrain = xgboost.DMatrix(X, y)
dtrain.set_info(feature_weights=fw)
# Perform column sampling for each node split evaluation, the sampling process is
# weighted by feature weights.
bst = xgboost.train(
{"tree_method": "hist", "colsample_bynode": 0.2},
dtrain,
num_boost_round=10,
evals=[(dtrain, "d")],
)
feature_map = bst.get_fscore()
# feature zero has 0 weight
assert feature_map.get("f0", None) is None
assert max(feature_map.values()) == feature_map.get("f9")
if args.plot:
xgboost.plot_importance(bst)
plt.show()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--plot",
type=int,
default=1,
help="Set to 0 to disable plotting the evaluation history.",
)
args = parser.parse_args()
main(args)