xgboost
predictor.h
Go to the documentation of this file.
1 
7 #pragma once
8 #include <xgboost/base.h>
9 #include <xgboost/data.h>
12 
13 #include <functional>
14 #include <memory>
15 #include <string>
16 #include <unordered_map>
17 #include <utility>
18 #include <vector>
19 #include <mutex>
20 
21 // Forward declarations
22 namespace xgboost {
23 class TreeUpdater;
24 namespace gbm {
25 struct GBTreeModel;
26 } // namespace gbm
27 }
28 
29 namespace xgboost {
36  // A storage for caching prediction values
38  // The version of current cache, corresponding number of layers of trees
39  uint32_t version { 0 };
40  // A weak pointer for checking whether the DMatrix object has expired.
41  std::weak_ptr< DMatrix > ref;
42 
43  PredictionCacheEntry() = default;
44  /* \brief Update the cache entry by number of versions.
45  *
46  * \param v Added versions.
47  */
48  void Update(uint32_t v) {
49  version += v;
50  }
51 };
52 
53 /* \brief A container for managed prediction caches.
54  */
56  std::unordered_map<DMatrix *, PredictionCacheEntry> container_;
57  void ClearExpiredEntries();
58 
59  public:
60  PredictionContainer() = default;
61  /* \brief Add a new DMatrix to the cache, at the same time this function will clear out
62  * all expired caches by checking the `std::weak_ptr`. Caching an existing
63  * DMatrix won't renew it.
64  *
65  * Passing in a `shared_ptr` is critical here. First to create a `weak_ptr` inside the
66  * entry this shared pointer is necessary. More importantly, the life time of this
67  * cache is tied to the shared pointer.
68  *
69  * Another way to make a safe cache is create a proxy to this entry, with anther shared
70  * pointer defined inside, and pass this proxy around instead of the real entry. But
71  * seems to be too messy. In XGBoost, functions like `UpdateOneIter` will have
72  * (memory) safe access to the DMatrix as long as it's passed in as a `shared_ptr`.
73  *
74  * \param m shared pointer to the DMatrix that needs to be cached.
75  * \param device Which device should the cache be allocated on. Pass
76  * GenericParameter::kCpuId for CPU or positive integer for GPU id.
77  *
78  * \return the cache entry for passed in DMatrix, either an existing cache or newly
79  * created.
80  */
81  PredictionCacheEntry& Cache(std::shared_ptr<DMatrix> m, int32_t device);
82  /* \brief Get a prediction cache entry. This entry must be already allocated by `Cache`
83  * method. Otherwise a dmlc::Error is thrown.
84  *
85  * \param m pointer to the DMatrix.
86  * \return The prediction cache for passed in DMatrix.
87  */
89  /* \brief Get a const reference to the underlying hash map. Clear expired caches before
90  * returning.
91  */
92  decltype(container_) const& Container();
93 };
94 
103 class Predictor {
104  protected:
105  /*
106  * \brief Runtime parameters.
107  */
109 
110  public:
111  explicit Predictor(GenericParameter const* ctx) : ctx_{ctx} {}
112 
113  virtual ~Predictor() = default;
114 
120  virtual void Configure(const std::vector<std::pair<std::string, std::string>>&);
121 
129  void InitOutPredictions(const MetaInfo& info, HostDeviceVector<bst_float>* out_predt,
130  const gbm::GBTreeModel& model) const;
131 
142  virtual void PredictBatch(DMatrix* dmat, PredictionCacheEntry* out_preds,
143  const gbm::GBTreeModel& model, uint32_t tree_begin,
144  uint32_t tree_end = 0) const = 0;
145 
157  virtual bool InplacePredict(dmlc::any const &x, std::shared_ptr<DMatrix> p_m,
158  const gbm::GBTreeModel &model, float missing,
159  PredictionCacheEntry *out_preds,
160  uint32_t tree_begin = 0,
161  uint32_t tree_end = 0) const = 0;
174  virtual void PredictInstance(const SparsePage::Inst& inst,
175  std::vector<bst_float>* out_preds,
176  const gbm::GBTreeModel& model,
177  unsigned tree_end = 0) const = 0;
178 
189  virtual void PredictLeaf(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
190  const gbm::GBTreeModel& model,
191  unsigned tree_end = 0) const = 0;
192 
208  virtual void
209  PredictContribution(DMatrix *dmat, HostDeviceVector<bst_float> *out_contribs,
210  const gbm::GBTreeModel &model, unsigned tree_end = 0,
211  std::vector<bst_float> const *tree_weights = nullptr,
212  bool approximate = false, int condition = 0,
213  unsigned condition_feature = 0) const = 0;
214 
215  virtual void PredictInteractionContributions(
216  DMatrix *dmat, HostDeviceVector<bst_float> *out_contribs,
217  const gbm::GBTreeModel &model, unsigned tree_end = 0,
218  std::vector<bst_float> const *tree_weights = nullptr,
219  bool approximate = false) const = 0;
220 
227  static Predictor* Create(
228  std::string const& name, GenericParameter const* generic_param);
229 };
230 
235  : public dmlc::FunctionRegEntryBase<
236  PredictorReg, std::function<Predictor*(GenericParameter const*)>> {};
237 
238 #define XGBOOST_REGISTER_PREDICTOR(UniqueId, Name) \
239  static DMLC_ATTRIBUTE_UNUSED ::xgboost::PredictorReg& \
240  __make_##PredictorReg##_##UniqueId##__ = \
241  ::dmlc::Registry<::xgboost::PredictorReg>::Get()->__REGISTER__(Name)
242 } // namespace xgboost
xgboost::Predictor::PredictBatch
virtual void PredictBatch(DMatrix *dmat, PredictionCacheEntry *out_preds, const gbm::GBTreeModel &model, uint32_t tree_begin, uint32_t tree_end=0) const =0
Generate batch predictions for a given feature matrix. May use cached predictions if available instea...
xgboost::Predictor::~Predictor
virtual ~Predictor()=default
xgboost::Predictor::Configure
virtual void Configure(const std::vector< std::pair< std::string, std::string >> &)
Configure and register input matrices in prediction cache.
xgboost::PredictionCacheEntry
Contains pointer to input matrix and associated cached predictions.
Definition: predictor.h:35
xgboost::PredictionContainer::PredictionContainer
PredictionContainer()=default
xgboost::Predictor::PredictContribution
virtual void PredictContribution(DMatrix *dmat, HostDeviceVector< bst_float > *out_contribs, const gbm::GBTreeModel &model, unsigned tree_end=0, std::vector< bst_float > const *tree_weights=nullptr, bool approximate=false, int condition=0, unsigned condition_feature=0) const =0
feature contributions to individual predictions; the output will be a vector of length (nfeats + 1) *...
xgboost::GenericParameter
Definition: generic_parameters.h:15
xgboost::PredictionContainer::Cache
PredictionCacheEntry & Cache(std::shared_ptr< DMatrix > m, int32_t device)
xgboost::HostDeviceVector< bst_float >
xgboost::PredictionCacheEntry::Update
void Update(uint32_t v)
Definition: predictor.h:48
host_device_vector.h
A device-and-host vector abstraction layer.
base.h
defines configuration macros of xgboost.
xgboost::PredictionCacheEntry::version
uint32_t version
Definition: predictor.h:39
xgboost::SparsePage::Inst
common::Span< Entry const > Inst
an instance of sparse vector in the batch
Definition: data.h:281
xgboost::DMatrix
Internal data structured used by XGBoost during training.
Definition: data.h:475
xgboost::PredictionContainer
Definition: predictor.h:55
xgboost::Predictor::InitOutPredictions
void InitOutPredictions(const MetaInfo &info, HostDeviceVector< bst_float > *out_predt, const gbm::GBTreeModel &model) const
Initialize output prediction.
xgboost::PredictionContainer::Entry
PredictionCacheEntry & Entry(DMatrix *m)
xgboost::PredictionCacheEntry::predictions
HostDeviceVector< bst_float > predictions
Definition: predictor.h:37
xgboost::Predictor::PredictInstance
virtual void PredictInstance(const SparsePage::Inst &inst, std::vector< bst_float > *out_preds, const gbm::GBTreeModel &model, unsigned tree_end=0) const =0
online prediction function, predict score for one instance at a time NOTE: use the batch prediction i...
xgboost::PredictionContainer::Container
decltype(container_) const & Container()
xgboost::PredictionCacheEntry::PredictionCacheEntry
PredictionCacheEntry()=default
xgboost::Predictor::PredictLeaf
virtual void PredictLeaf(DMatrix *dmat, HostDeviceVector< bst_float > *out_preds, const gbm::GBTreeModel &model, unsigned tree_end=0) const =0
predict the leaf index of each tree, the output will be nsample * ntree vector this is only valid in ...
xgboost::Predictor::ctx_
GenericParameter const * ctx_
Definition: predictor.h:108
generic_parameters.h
data.h
The input data structure of xgboost.
xgboost::PredictorReg
Registry entry for predictor.
Definition: predictor.h:234
xgboost::Predictor::Create
static Predictor * Create(std::string const &name, GenericParameter const *generic_param)
Creates a new Predictor*.
xgboost::Predictor
Performs prediction on individual training instances or batches of instances for GBTree....
Definition: predictor.h:103
xgboost::Predictor::PredictInteractionContributions
virtual void PredictInteractionContributions(DMatrix *dmat, HostDeviceVector< bst_float > *out_contribs, const gbm::GBTreeModel &model, unsigned tree_end=0, std::vector< bst_float > const *tree_weights=nullptr, bool approximate=false) const =0
xgboost::PredictionCacheEntry::ref
std::weak_ptr< DMatrix > ref
Definition: predictor.h:41
xgboost::Predictor::InplacePredict
virtual bool InplacePredict(dmlc::any const &x, std::shared_ptr< DMatrix > p_m, const gbm::GBTreeModel &model, float missing, PredictionCacheEntry *out_preds, uint32_t tree_begin=0, uint32_t tree_end=0) const =0
Inplace prediction.
xgboost::Predictor::Predictor
Predictor(GenericParameter const *ctx)
Definition: predictor.h:111
xgboost
namespace of xgboost
Definition: base.h:110