release_1.7.0/dev/predictor_8h_source.html

 #pragma once

 #include <xgboost/base.h>

 #include <xgboost/data.h>

 #include <xgboost/generic_parameters.h>

 #include <xgboost/host_device_vector.h>


 #include <functional>

 #include <memory>

 #include <string>

 #include <unordered_map>

 #include <utility>

 #include <vector>

 #include <mutex>


 // Forward declarations

 namespace xgboost {

 class TreeUpdater;

 namespace gbm {

 struct GBTreeModel;

 }  // namespace gbm

 }


 namespace xgboost {

 struct PredictionCacheEntry {

   // A storage for caching prediction values

   HostDeviceVector<bst_float> predictions;

   // The version of current cache, corresponding number of layers of trees

   uint32_t version { 0 };

   // A weak pointer for checking whether the DMatrix object has expired.

   std::weak_ptr< DMatrix > ref;


   PredictionCacheEntry() = default;

   /* \brief Update the cache entry by number of versions.

    *

    * \param v Added versions.

    */

   void Update(uint32_t v) {

     version += v;

   }

 };


 /* \brief A container for managed prediction caches.

  */

 class PredictionContainer {

   std::unordered_map<DMatrix *, PredictionCacheEntry> container_;

   void ClearExpiredEntries();


  public:

   PredictionContainer() = default;

   /* \brief Add a new DMatrix to the cache, at the same time this function will clear out

    *        all expired caches by checking the `std::weak_ptr`.  Caching an existing

    *        DMatrix won't renew it.

    *

    *  Passing in a `shared_ptr` is critical here.  First to create a `weak_ptr` inside the

    *  entry this shared pointer is necessary.  More importantly, the life time of this

    *  cache is tied to the shared pointer.

    *

    *  Another way to make a safe cache is create a proxy to this entry, with anther shared

    *  pointer defined inside, and pass this proxy around instead of the real entry.  But

    *  seems to be too messy.  In XGBoost, functions like `UpdateOneIter` will have

    *  (memory) safe access to the DMatrix as long as it's passed in as a `shared_ptr`.

    *

    * \param m shared pointer to the DMatrix that needs to be cached.

    * \param device Which device should the cache be allocated on.  Pass

    *               GenericParameter::kCpuId for CPU or positive integer for GPU id.

    *

    * \return the cache entry for passed in DMatrix, either an existing cache or newly

    *         created.

    */

   PredictionCacheEntry& Cache(std::shared_ptr<DMatrix> m, int32_t device);

   /* \brief Get a prediction cache entry.  This entry must be already allocated by `Cache`

    *        method.  Otherwise a dmlc::Error is thrown.

    *

    * \param m pointer to the DMatrix.

    * \return The prediction cache for passed in DMatrix.

    */

   PredictionCacheEntry& Entry(DMatrix* m);

   /* \brief Get a const reference to the underlying hash map.  Clear expired caches before

    *        returning.

    */

   decltype(container_) const& Container();

 };


 class Predictor {

  protected:

   Context const* ctx_;


  public:

   explicit Predictor(Context const* ctx) : ctx_{ctx} {}


   virtual ~Predictor() = default;


   virtual void Configure(const std::vector<std::pair<std::string, std::string>>&);


   void InitOutPredictions(const MetaInfo& info, HostDeviceVector<bst_float>* out_predt,

                           const gbm::GBTreeModel& model) const;


   virtual void PredictBatch(DMatrix* dmat, PredictionCacheEntry* out_preds,

                             const gbm::GBTreeModel& model, uint32_t tree_begin,

                             uint32_t tree_end = 0) const = 0;


   virtual bool InplacePredict(std::shared_ptr<DMatrix> p_fmat, const gbm::GBTreeModel& model,

                               float missing, PredictionCacheEntry* out_preds,

                               uint32_t tree_begin = 0, uint32_t tree_end = 0) const = 0;

   virtual void PredictInstance(const SparsePage::Inst& inst,

                                std::vector<bst_float>* out_preds,

                                const gbm::GBTreeModel& model,

                                unsigned tree_end = 0) const = 0;


   virtual void PredictLeaf(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,

                            const gbm::GBTreeModel& model,

                            unsigned tree_end = 0) const = 0;


   virtual void

   PredictContribution(DMatrix *dmat, HostDeviceVector<bst_float> *out_contribs,

                       const gbm::GBTreeModel &model, unsigned tree_end = 0,

                       std::vector<bst_float> const *tree_weights = nullptr,

                       bool approximate = false, int condition = 0,

                       unsigned condition_feature = 0) const = 0;


   virtual void PredictInteractionContributions(

       DMatrix *dmat, HostDeviceVector<bst_float> *out_contribs,

       const gbm::GBTreeModel &model, unsigned tree_end = 0,

       std::vector<bst_float> const *tree_weights = nullptr,

       bool approximate = false) const = 0;


   static Predictor* Create(

       std::string const& name, GenericParameter const* generic_param);

 };


 struct PredictorReg

     : public dmlc::FunctionRegEntryBase<

   PredictorReg, std::function<Predictor*(GenericParameter const*)>> {};


 #define XGBOOST_REGISTER_PREDICTOR(UniqueId, Name)      \

   static DMLC_ATTRIBUTE_UNUSED ::xgboost::PredictorReg& \

       __make_##PredictorReg##_##UniqueId##__ =          \

           ::dmlc::Registry<::xgboost::PredictorReg>::Get()->__REGISTER__(Name)

 }  // namespace xgboost

base.h
defines configuration macros of xgboost.

xgboost::DMatrix
Internal data structured used by XGBoost during training.
Definition: data.h:490

xgboost::HostDeviceVector< bst_float >

xgboost::MetaInfo
Meta information about dataset, always sit in memory.
Definition: data.h:46

xgboost::PredictionContainer
Definition: predictor.h:55

xgboost::PredictionContainer::PredictionContainer
PredictionContainer()=default

xgboost::PredictionContainer::Entry
PredictionCacheEntry & Entry(DMatrix *m)

xgboost::PredictionContainer::Cache
PredictionCacheEntry & Cache(std::shared_ptr< DMatrix > m, int32_t device)

xgboost::PredictionContainer::Container
decltype(container_) const  & Container()

xgboost::Predictor
Performs prediction on individual training instances or batches of instances for GBTree....
Definition: predictor.h:103

xgboost::Predictor::PredictInteractionContributions
virtual void PredictInteractionContributions(DMatrix *dmat, HostDeviceVector< bst_float > *out_contribs, const gbm::GBTreeModel &model, unsigned tree_end=0, std::vector< bst_float > const *tree_weights=nullptr, bool approximate=false) const =0

xgboost::Predictor::Create
static Predictor * Create(std::string const &name, GenericParameter const *generic_param)
Creates a new Predictor*.

xgboost::Predictor::InitOutPredictions
void InitOutPredictions(const MetaInfo &info, HostDeviceVector< bst_float > *out_predt, const gbm::GBTreeModel &model) const
Initialize output prediction.

xgboost::Predictor::PredictContribution
virtual void PredictContribution(DMatrix *dmat, HostDeviceVector< bst_float > *out_contribs, const gbm::GBTreeModel &model, unsigned tree_end=0, std::vector< bst_float > const *tree_weights=nullptr, bool approximate=false, int condition=0, unsigned condition_feature=0) const =0
feature contributions to individual predictions; the output will be a vector of length (nfeats + 1) *...

xgboost::Predictor::Configure
virtual void Configure(const std::vector< std::pair< std::string, std::string >> &)
Configure and register input matrices in prediction cache.

xgboost::Predictor::Predictor
Predictor(Context const *ctx)
Definition: predictor.h:108

xgboost::Predictor::PredictLeaf
virtual void PredictLeaf(DMatrix *dmat, HostDeviceVector< bst_float > *out_preds, const gbm::GBTreeModel &model, unsigned tree_end=0) const =0
predict the leaf index of each tree, the output will be nsample * ntree vector this is only valid in ...

xgboost::Predictor::ctx_
Context const  * ctx_
Definition: predictor.h:105

xgboost::Predictor::PredictInstance
virtual void PredictInstance(const SparsePage::Inst &inst, std::vector< bst_float > *out_preds, const gbm::GBTreeModel &model, unsigned tree_end=0) const =0
online prediction function, predict score for one instance at a time NOTE: use the batch prediction i...

xgboost::Predictor::~Predictor
virtual ~Predictor()=default

xgboost::Predictor::InplacePredict
virtual bool InplacePredict(std::shared_ptr< DMatrix > p_fmat, const gbm::GBTreeModel &model, float missing, PredictionCacheEntry *out_preds, uint32_t tree_begin=0, uint32_t tree_end=0) const =0
Inplace prediction.

xgboost::Predictor::PredictBatch
virtual void PredictBatch(DMatrix *dmat, PredictionCacheEntry *out_preds, const gbm::GBTreeModel &model, uint32_t tree_begin, uint32_t tree_end=0) const =0
Generate batch predictions for a given feature matrix. May use cached predictions if available instea...

xgboost::common::Span
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition: span.h:423

data.h
The input data structure of xgboost.

generic_parameters.h

host_device_vector.h
A device-and-host vector abstraction layer.

xgboost
namespace of xgboost
Definition: base.h:110

xgboost::GenericParameter
Definition: generic_parameters.h:15

xgboost::PredictionCacheEntry
Contains pointer to input matrix and associated cached predictions.
Definition: predictor.h:35

xgboost::PredictionCacheEntry::version
uint32_t version
Definition: predictor.h:39

xgboost::PredictionCacheEntry::predictions
HostDeviceVector< bst_float > predictions
Definition: predictor.h:37

xgboost::PredictionCacheEntry::Update
void Update(uint32_t v)
Definition: predictor.h:48

xgboost::PredictionCacheEntry::ref
std::weak_ptr< DMatrix > ref
Definition: predictor.h:41

xgboost::PredictionCacheEntry::PredictionCacheEntry
PredictionCacheEntry()=default

xgboost::PredictorReg
Registry entry for predictor.
Definition: predictor.h:233