.. DO NOT EDIT. .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: .. "python/examples/prediction_intervals.py" .. LINE NUMBERS ARE GIVEN BELOW. .. only:: html .. note:: :class: sphx-glr-download-link-note :ref:`Go to the end ` to download the full example code. .. rst-class:: sphx-glr-example-title .. _sphx_glr_python_examples_prediction_intervals.py: Prediction Intervals with Quantile and Expectile Regression =========================================================== .. versionadded:: 2.0.0 The script is inspired by this awesome example in sklearn: https://scikit-learn.org/stable/auto_examples/ensemble/plot_gradient_boosting_quantile.html .. note:: The feature is only supported using the Python, R, and C packages. In addition, quantile crossing can happen due to limitation in the algorithm. This example also trains ``reg:expectileerror``. Expectiles are asymmetric means, not percentiles, but they can be used to construct tail-sensitive bands around the conditional mean. .. versionadded:: 3.3.0 .. GENERATED FROM PYTHON SOURCE LINES 22-235 .. code-block:: Python import argparse from typing import Dict, Tuple import numpy as np import xgboost as xgb from sklearn.model_selection import train_test_split Data = Tuple[xgb.QuantileDMatrix, xgb.QuantileDMatrix] Predictions = Dict[str, np.ndarray] def f(x: np.ndarray) -> np.ndarray: """The function to predict.""" return x * np.sin(x) def make_dataset(rng: np.random.RandomState) -> Tuple[np.ndarray, np.ndarray]: """Generate heteroscedastic data with asymmetric noise.""" features = np.atleast_2d(rng.uniform(0, 10.0, size=1000)).T expected_y = f(features).ravel() sigma = 0.5 + features.ravel() / 10.0 noise = rng.lognormal(sigma=sigma) - np.exp(sigma**2.0 / 2.0) target = expected_y + noise return features, target def train_interval_model( params: Dict[str, object], alpha_name: str, alpha: np.ndarray, data: Data, ) -> Tuple[xgb.Booster, Dict[str, Dict]]: """Train a multi-output interval model.""" dtrain, dtest = data evals_result: Dict[str, Dict] = {} params = params.copy() params[alpha_name] = alpha booster = xgb.train( params, dtrain, num_boost_round=64, early_stopping_rounds=4, evals=[(dtrain, "Train"), (dtest, "Test")], evals_result=evals_result, ) return booster, evals_result def squared_error_model(params: Dict[str, object], data: Data) -> xgb.Booster: """Train a squared-error model for comparison.""" dtrain, dtest = data params = params.copy() params["objective"] = "reg:squarederror" return xgb.train( params, dtrain, num_boost_round=64, early_stopping_rounds=4, evals=[(dtrain, "Train"), (dtest, "Test")], ) def base_params(cli_args: argparse.Namespace) -> Dict[str, object]: """Parameters shared by the three models in the demo.""" return { "tree_method": "hist", "learning_rate": 0.04, "max_depth": 5, "multi_strategy": cli_args.multi_strategy, "device": cli_args.device, } def train_models( data: Data, alpha: np.ndarray, params: Dict[str, object] ) -> Tuple[Dict[str, xgb.Booster], Dict[str, Dict[str, Dict]]]: """Train quantile, expectile, and squared-error models.""" quantile_model, quantile_evals = train_interval_model( {**params, "objective": "reg:quantileerror"}, "quantile_alpha", alpha, data ) expectile_model, expectile_evals = train_interval_model( {**params, "objective": "reg:expectileerror"}, "expectile_alpha", alpha, data ) models = { "quantile": quantile_model, "expectile": expectile_model, "mean": squared_error_model(params, data), } return models, {"quantile": quantile_evals, "expectile": expectile_evals} def make_predictions(models: Dict[str, xgb.Booster]) -> Tuple[np.ndarray, Predictions]: """Predict on a dense grid for plotting.""" grid = np.atleast_2d(np.linspace(0, 10, 1000)).T predictions = { "quantile": models["quantile"].inplace_predict(grid), "expectile": models["expectile"].inplace_predict(grid), "mean": models["mean"].inplace_predict(grid), } return grid, predictions def make_matrices( rng: np.random.RandomState, ) -> Tuple[Data, Tuple[np.ndarray, np.ndarray]]: """Create train/test DMatrices and return held-out data for plotting.""" features, target = make_dataset(rng) split = train_test_split(features, target, random_state=rng) train_features, test_features, train_target, test_target = split train = xgb.QuantileDMatrix(train_features, train_target) test = xgb.QuantileDMatrix(test_features, test_target, ref=train) return (train, test), (test_features, test_target) def plot_prediction_intervals( grid: np.ndarray, test_data: Tuple[np.ndarray, np.ndarray], predictions: Predictions, output: str, ) -> None: """Plot quantile interval and expectile band.""" from matplotlib import pyplot as plt features, target = test_data fig, axes = plt.subplots(2, 1, figsize=(10, 12), sharex=True, sharey=True) def plot_band( ax: "plt.Axes", pred: np.ndarray, title: str, center_label: str ) -> None: ax.plot(grid, f(grid), "g:", linewidth=3, label=r"$f(x) = x\,\sin(x)$") ax.plot( features, target, "b.", markersize=5, alpha=0.35, label="Test observations", ) ax.plot(grid, pred[:, 1], "r-", label=center_label) ax.plot(grid, predictions["mean"], "m--", label="Squared-error mean") ax.plot(grid, pred[:, 0], "k-") ax.plot(grid, pred[:, 2], "k-") ax.fill_between( grid.ravel(), pred[:, 0], pred[:, 2], alpha=0.3, label="90% band" ) ax.set_title(title) ax.set_ylabel("$y$") ax.set_ylim(-10, 25) ax.legend(loc="upper left") plot_band( axes[0], predictions["quantile"], "Quantile regression interval", "Predicted median", ) plot_band( axes[1], predictions["expectile"], "Expectile regression band", "Predicted 0.5 expectile", ) axes[1].set_xlabel("$x$") fig.tight_layout() if output: fig.savefig(output, dpi=150) else: plt.show() def prediction_intervals(cli_args: argparse.Namespace) -> None: """Train quantile and expectile interval models.""" rng = np.random.RandomState(1994) alpha = np.array([0.05, 0.5, 0.95]) data, test_data = make_matrices(rng) models, evals = train_models(data, alpha, base_params(cli_args)) grid, predictions = make_predictions(models) assert predictions["quantile"].shape == (grid.shape[0], alpha.shape[0]) assert predictions["expectile"].shape == (grid.shape[0], alpha.shape[0]) print("Quantile test metric:", evals["quantile"]["Test"]["quantile"][-1]) print("Expectile test metric:", evals["expectile"]["Test"]["expectile"][-1]) if cli_args.plot: plot_prediction_intervals(grid, test_data, predictions, cli_args.output) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--plot", action="store_true", help="Specify it to enable plotting the outputs.", ) parser.add_argument( "--output", type=str, default="", help="Optional path for saving the plot instead of displaying it.", ) parser.add_argument( "--multi_strategy", choices=["multi_output_tree", "one_output_per_tree"], default="one_output_per_tree", help="See the parameter `multi_strategy` for more info. (Experimental)", ) parser.add_argument("--device", choices=["cpu", "cuda"], default="cpu") prediction_intervals(parser.parse_args()) .. _sphx_glr_download_python_examples_prediction_intervals.py: .. only:: html .. container:: sphx-glr-footer sphx-glr-footer-example .. container:: sphx-glr-download sphx-glr-download-jupyter :download:`Download Jupyter notebook: prediction_intervals.ipynb ` .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: prediction_intervals.py ` .. container:: sphx-glr-download sphx-glr-download-zip :download:`Download zipped: prediction_intervals.zip ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_