Note
Go to the end to download the full example code.
Use scikit-learn regressor interface with GPU histogram tree method
import dask
from dask import array as da
from dask.distributed import Client
# It's recommended to use dask_cuda for GPU assignment
from dask_cuda import LocalCUDACluster
from xgboost import dask as dxgb
def main(client: Client) -> dxgb.Booster:
# Generate some random data for demonstration
rng = da.random.default_rng(1)
m = 2**18
n = 100
X = rng.uniform(size=(m, n), chunks=(128**2, -1))
y = X.sum(axis=1)
regressor = dxgb.DaskXGBRegressor(verbosity=1)
# Set the device to CUDA
regressor.set_params(tree_method="hist", device="cuda")
# Assigning client here is optional
regressor.client = client
regressor.fit(X, y, eval_set=[(X, y)])
prediction = regressor.predict(X)
bst = regressor.get_booster()
history = regressor.evals_result()
print("Evaluation history:", history)
# returned prediction is always a dask array.
assert isinstance(prediction, da.Array)
return bst # returning the trained model
if __name__ == "__main__":
# With dask cuda, one can scale up XGBoost to arbitrary GPU clusters.
# `LocalCUDACluster` used here is only for demonstration purpose.
with LocalCUDACluster() as cluster:
# Create client from cluster, set the backend to GPU array (cupy).
with Client(cluster) as client, dask.config.set({"array.backend": "cupy"}):
main(client)