.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "python/examples/prediction_intervals.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        :ref:`Go to the end <sphx_glr_download_python_examples_prediction_intervals.py>`
        to download the full example code.

.. rst-class:: sphx-glr-example-title

.. _sphx_glr_python_examples_prediction_intervals.py:


Prediction Intervals with Quantile and Expectile Regression
===========================================================

    .. versionadded:: 2.0.0

The script is inspired by this awesome example in sklearn:
https://scikit-learn.org/stable/auto_examples/ensemble/plot_gradient_boosting_quantile.html

.. note::

    The feature is only supported using the Python, R, and C packages. In addition, quantile
    crossing can happen due to limitation in the algorithm.

This example also trains ``reg:expectileerror``. Expectiles are asymmetric means,
not percentiles, but they can be used to construct tail-sensitive bands around the
conditional mean.

    .. versionadded:: 3.3.0

.. GENERATED FROM PYTHON SOURCE LINES 22-235

.. code-block:: Python


    import argparse
    from typing import Dict, Tuple

    import numpy as np
    import xgboost as xgb
    from sklearn.model_selection import train_test_split

    Data = Tuple[xgb.QuantileDMatrix, xgb.QuantileDMatrix]
    Predictions = Dict[str, np.ndarray]


    def f(x: np.ndarray) -> np.ndarray:
        """The function to predict."""
        return x * np.sin(x)


    def make_dataset(rng: np.random.RandomState) -> Tuple[np.ndarray, np.ndarray]:
        """Generate heteroscedastic data with asymmetric noise."""
        features = np.atleast_2d(rng.uniform(0, 10.0, size=1000)).T
        expected_y = f(features).ravel()

        sigma = 0.5 + features.ravel() / 10.0
        noise = rng.lognormal(sigma=sigma) - np.exp(sigma**2.0 / 2.0)
        target = expected_y + noise
        return features, target


    def train_interval_model(
        params: Dict[str, object],
        alpha_name: str,
        alpha: np.ndarray,
        data: Data,
    ) -> Tuple[xgb.Booster, Dict[str, Dict]]:
        """Train a multi-output interval model."""
        dtrain, dtest = data
        evals_result: Dict[str, Dict] = {}
        params = params.copy()
        params[alpha_name] = alpha
        booster = xgb.train(
            params,
            dtrain,
            num_boost_round=64,
            early_stopping_rounds=4,
            evals=[(dtrain, "Train"), (dtest, "Test")],
            evals_result=evals_result,
        )
        return booster, evals_result


    def squared_error_model(params: Dict[str, object], data: Data) -> xgb.Booster:
        """Train a squared-error model for comparison."""
        dtrain, dtest = data
        params = params.copy()
        params["objective"] = "reg:squarederror"
        return xgb.train(
            params,
            dtrain,
            num_boost_round=64,
            early_stopping_rounds=4,
            evals=[(dtrain, "Train"), (dtest, "Test")],
        )


    def base_params(cli_args: argparse.Namespace) -> Dict[str, object]:
        """Parameters shared by the three models in the demo."""
        return {
            "tree_method": "hist",
            "learning_rate": 0.04,
            "max_depth": 5,
            "multi_strategy": cli_args.multi_strategy,
            "device": cli_args.device,
        }


    def train_models(
        data: Data, alpha: np.ndarray, params: Dict[str, object]
    ) -> Tuple[Dict[str, xgb.Booster], Dict[str, Dict[str, Dict]]]:
        """Train quantile, expectile, and squared-error models."""
        quantile_model, quantile_evals = train_interval_model(
            {**params, "objective": "reg:quantileerror"}, "quantile_alpha", alpha, data
        )
        expectile_model, expectile_evals = train_interval_model(
            {**params, "objective": "reg:expectileerror"}, "expectile_alpha", alpha, data
        )
        models = {
            "quantile": quantile_model,
            "expectile": expectile_model,
            "mean": squared_error_model(params, data),
        }
        return models, {"quantile": quantile_evals, "expectile": expectile_evals}


    def make_predictions(models: Dict[str, xgb.Booster]) -> Tuple[np.ndarray, Predictions]:
        """Predict on a dense grid for plotting."""
        grid = np.atleast_2d(np.linspace(0, 10, 1000)).T
        predictions = {
            "quantile": models["quantile"].inplace_predict(grid),
            "expectile": models["expectile"].inplace_predict(grid),
            "mean": models["mean"].inplace_predict(grid),
        }
        return grid, predictions


    def make_matrices(
        rng: np.random.RandomState,
    ) -> Tuple[Data, Tuple[np.ndarray, np.ndarray]]:
        """Create train/test DMatrices and return held-out data for plotting."""
        features, target = make_dataset(rng)
        split = train_test_split(features, target, random_state=rng)
        train_features, test_features, train_target, test_target = split
        train = xgb.QuantileDMatrix(train_features, train_target)
        test = xgb.QuantileDMatrix(test_features, test_target, ref=train)
        return (train, test), (test_features, test_target)


    def plot_prediction_intervals(
        grid: np.ndarray,
        test_data: Tuple[np.ndarray, np.ndarray],
        predictions: Predictions,
        output: str,
    ) -> None:
        """Plot quantile interval and expectile band."""
        from matplotlib import pyplot as plt

        features, target = test_data
        fig, axes = plt.subplots(2, 1, figsize=(10, 12), sharex=True, sharey=True)

        def plot_band(
            ax: "plt.Axes", pred: np.ndarray, title: str, center_label: str
        ) -> None:
            ax.plot(grid, f(grid), "g:", linewidth=3, label=r"$f(x) = x\,\sin(x)$")
            ax.plot(
                features,
                target,
                "b.",
                markersize=5,
                alpha=0.35,
                label="Test observations",
            )
            ax.plot(grid, pred[:, 1], "r-", label=center_label)
            ax.plot(grid, predictions["mean"], "m--", label="Squared-error mean")
            ax.plot(grid, pred[:, 0], "k-")
            ax.plot(grid, pred[:, 2], "k-")
            ax.fill_between(
                grid.ravel(), pred[:, 0], pred[:, 2], alpha=0.3, label="90% band"
            )
            ax.set_title(title)
            ax.set_ylabel("$y$")
            ax.set_ylim(-10, 25)
            ax.legend(loc="upper left")

        plot_band(
            axes[0],
            predictions["quantile"],
            "Quantile regression interval",
            "Predicted median",
        )
        plot_band(
            axes[1],
            predictions["expectile"],
            "Expectile regression band",
            "Predicted 0.5 expectile",
        )
        axes[1].set_xlabel("$x$")
        fig.tight_layout()

        if output:
            fig.savefig(output, dpi=150)
        else:
            plt.show()


    def prediction_intervals(cli_args: argparse.Namespace) -> None:
        """Train quantile and expectile interval models."""
        rng = np.random.RandomState(1994)
        alpha = np.array([0.05, 0.5, 0.95])

        data, test_data = make_matrices(rng)
        models, evals = train_models(data, alpha, base_params(cli_args))
        grid, predictions = make_predictions(models)

        assert predictions["quantile"].shape == (grid.shape[0], alpha.shape[0])
        assert predictions["expectile"].shape == (grid.shape[0], alpha.shape[0])

        print("Quantile test metric:", evals["quantile"]["Test"]["quantile"][-1])
        print("Expectile test metric:", evals["expectile"]["Test"]["expectile"][-1])

        if cli_args.plot:
            plot_prediction_intervals(grid, test_data, predictions, cli_args.output)


    if __name__ == "__main__":
        parser = argparse.ArgumentParser()
        parser.add_argument(
            "--plot",
            action="store_true",
            help="Specify it to enable plotting the outputs.",
        )
        parser.add_argument(
            "--output",
            type=str,
            default="",
            help="Optional path for saving the plot instead of displaying it.",
        )
        parser.add_argument(
            "--multi_strategy",
            choices=["multi_output_tree", "one_output_per_tree"],
            default="one_output_per_tree",
            help="See the parameter `multi_strategy` for more info. (Experimental)",
        )
        parser.add_argument("--device", choices=["cpu", "cuda"], default="cpu")
        prediction_intervals(parser.parse_args())


.. _sphx_glr_download_python_examples_prediction_intervals.py:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example

    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: prediction_intervals.ipynb <prediction_intervals.ipynb>`

    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download Python source code: prediction_intervals.py <prediction_intervals.py>`

    .. container:: sphx-glr-download sphx-glr-download-zip

      :download:`Download zipped: prediction_intervals.zip <prediction_intervals.zip>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_