xgboost
learner.h
Go to the documentation of this file.
1 
8 #ifndef XGBOOST_LEARNER_H_
9 #define XGBOOST_LEARNER_H_
10 
11 #include <xgboost/base.h>
12 #include <xgboost/feature_map.h>
13 #include <xgboost/generic_parameters.h> // Context
15 #include <xgboost/model.h>
16 #include <xgboost/predictor.h>
17 #include <xgboost/task.h>
18 
19 #include <map>
20 #include <memory>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 
25 namespace xgboost {
26 
27 class Metric;
28 class GradientBooster;
29 class ObjFunction;
30 class DMatrix;
31 class Json;
32 
33 enum class PredictionType : std::uint8_t { // NOLINT
34  kValue = 0,
35  kMargin = 1,
36  kContribution = 2,
38  kInteraction = 4,
40  kLeaf = 6
41 };
42 
46  std::string ret_str;
48  std::vector<char> ret_char_vec;
50  std::vector<std::string> ret_vec_str;
52  std::vector<const char *> ret_vec_charp;
54  std::vector<bst_float> ret_vec_float;
56  std::vector<GradientPair> tmp_gpair;
60  std::vector<bst_ulong> prediction_shape;
61 };
62 
79 class Learner : public Model, public Configurable, public dmlc::Serializable {
80  public:
82  ~Learner() override;
86  virtual void Configure() = 0;
93  virtual void UpdateOneIter(int iter, std::shared_ptr<DMatrix> train) = 0;
101  virtual void BoostOneIter(int iter,
102  std::shared_ptr<DMatrix> train,
103  HostDeviceVector<GradientPair>* in_gpair) = 0;
111  virtual std::string EvalOneIter(int iter,
112  const std::vector<std::shared_ptr<DMatrix>>& data_sets,
113  const std::vector<std::string>& data_names) = 0;
127  virtual void Predict(std::shared_ptr<DMatrix> data,
128  bool output_margin,
129  HostDeviceVector<bst_float> *out_preds,
130  unsigned layer_begin,
131  unsigned layer_end,
132  bool training = false,
133  bool pred_leaf = false,
134  bool pred_contribs = false,
135  bool approx_contribs = false,
136  bool pred_interactions = false) = 0;
137 
148  virtual void InplacePredict(std::shared_ptr<DMatrix> p_m, PredictionType type, float missing,
149  HostDeviceVector<bst_float>** out_preds, uint32_t layer_begin,
150  uint32_t layer_end) = 0;
151 
155  virtual void CalcFeatureScore(std::string const& importance_type,
157  std::vector<bst_feature_t>* features,
158  std::vector<float>* scores) = 0;
159 
160  /*
161  * \brief Get number of boosted rounds from gradient booster.
162  */
163  virtual int32_t BoostedRounds() const = 0;
164  virtual uint32_t Groups() const = 0;
165 
166  void LoadModel(Json const& in) override = 0;
167  void SaveModel(Json* out) const override = 0;
168 
169  virtual void LoadModel(dmlc::Stream* fi) = 0;
170  virtual void SaveModel(dmlc::Stream* fo) const = 0;
171 
177  virtual void SetParams(Args const& args) = 0;
186  virtual void SetParam(const std::string& key, const std::string& value) = 0;
187 
192  virtual uint32_t GetNumFeature() const = 0;
193 
202  virtual void SetAttr(const std::string& key, const std::string& value) = 0;
210  virtual bool GetAttr(const std::string& key, std::string* out) const = 0;
216  virtual bool DelAttr(const std::string& key) = 0;
221  virtual std::vector<std::string> GetAttrNames() const = 0;
226  virtual void SetFeatureNames(std::vector<std::string> const& fn) = 0;
231  virtual void GetFeatureNames(std::vector<std::string>* fn) const = 0;
236  virtual void SetFeatureTypes(std::vector<std::string> const& ft) = 0;
241  virtual void GetFeatureTypes(std::vector<std::string>* ft) const = 0;
242 
253  virtual Learner *Slice(int32_t begin_layer, int32_t end_layer, int32_t step,
254  bool *out_of_bound) = 0;
262  virtual std::vector<std::string> DumpModel(const FeatureMap& fmap,
263  bool with_stats,
264  std::string format) = 0;
265 
272  static Learner* Create(const std::vector<std::shared_ptr<DMatrix> >& cache_data);
276  virtual Context const* Ctx() const = 0;
281  virtual const std::map<std::string, std::string>& GetConfigurationArguments() const = 0;
282 
283  protected:
285  std::unique_ptr<ObjFunction> obj_;
287  std::unique_ptr<GradientBooster> gbm_;
289  std::vector<std::unique_ptr<Metric> > metrics_;
292 };
293 
294 struct LearnerModelParamLegacy;
295 
296 /*
297  * \brief Basic Model Parameters, used to describe the booster.
298  */
300  private:
305  linalg::Tensor<float, 1> base_score_;
306 
307  public:
308  /* \brief number of features */
309  uint32_t num_feature { 0 };
310  /* \brief number of classes, if it is multi-class classification */
311  uint32_t num_output_group { 0 };
312  /* \brief Current task, determined by objective. */
314 
315  LearnerModelParam() = default;
316  // As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
317  // this one as an immutable copy.
318  LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,
319  linalg::Tensor<float, 1> base_margin, ObjInfo t);
320  LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t);
322  uint32_t n_groups)
323  : base_score_{std::move(base_margin)}, num_feature{n_features}, num_output_group{n_groups} {}
324 
327 
328  void Copy(LearnerModelParam const& that);
329 
330  /* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
331  bool Initialized() const { return num_feature != 0 && num_output_group != 0; }
332 };
333 
334 } // namespace xgboost
335 #endif // XGBOOST_LEARNER_H_
defines configuration macros of xgboost.
Feature map data structure to help text model dump. TODO(tqchen) consider make it even more lightweig...
Definition: feature_map.h:22
Definition: host_device_vector.h:86
Data structure representing JSON format.
Definition: json.h:356
Learner class that does training and prediction. This is the user facing module of xgboost training....
Definition: learner.h:79
virtual std::vector< std::string > GetAttrNames() const =0
Get a vector of attribute names from the booster.
virtual void SetParam(const std::string &key, const std::string &value)=0
Set parameter for booster.
virtual void LoadModel(dmlc::Stream *fi)=0
std::vector< std::unique_ptr< Metric > > metrics_
The evaluation metrics used to evaluate the model.
Definition: learner.h:289
virtual void CalcFeatureScore(std::string const &importance_type, common::Span< int32_t const > trees, std::vector< bst_feature_t > *features, std::vector< float > *scores)=0
Calculate feature score. See doc in C API for outputs.
virtual std::vector< std::string > DumpModel(const FeatureMap &fmap, bool with_stats, std::string format)=0
dump the model in the requested format
virtual std::string EvalOneIter(int iter, const std::vector< std::shared_ptr< DMatrix >> &data_sets, const std::vector< std::string > &data_names)=0
evaluate the model for specific iteration using the configured metrics.
~Learner() override
virtual destructor
virtual XGBAPIThreadLocalEntry & GetThreadLocal() const =0
virtual const std::map< std::string, std::string > & GetConfigurationArguments() const =0
Get configuration arguments currently stored by the learner.
virtual void SetFeatureNames(std::vector< std::string > const &fn)=0
Set the feature names for current booster.
virtual void SaveModel(dmlc::Stream *fo) const =0
virtual Context const * Ctx() const =0
Return the context object of this Booster.
virtual void BoostOneIter(int iter, std::shared_ptr< DMatrix > train, HostDeviceVector< GradientPair > *in_gpair)=0
Do customized gradient boosting with in_gpair. in_gair can be mutated after this call.
virtual void Configure()=0
Configure Learner based on set parameters.
virtual int32_t BoostedRounds() const =0
virtual bool DelAttr(const std::string &key)=0
Delete an attribute from the booster.
virtual bool GetAttr(const std::string &key, std::string *out) const =0
Get attribute from the booster. The property will be saved along the booster.
virtual void SetParams(Args const &args)=0
Set multiple parameters at once.
virtual void UpdateOneIter(int iter, std::shared_ptr< DMatrix > train)=0
update the model for one iteration With the specified objective function.
virtual void Predict(std::shared_ptr< DMatrix > data, bool output_margin, HostDeviceVector< bst_float > *out_preds, unsigned layer_begin, unsigned layer_end, bool training=false, bool pred_leaf=false, bool pred_contribs=false, bool approx_contribs=false, bool pred_interactions=false)=0
get prediction given the model.
virtual Learner * Slice(int32_t begin_layer, int32_t end_layer, int32_t step, bool *out_of_bound)=0
Slice the model.
virtual void SetAttr(const std::string &key, const std::string &value)=0
Set additional attribute to the Booster.
std::unique_ptr< ObjFunction > obj_
objective function
Definition: learner.h:285
virtual void InplacePredict(std::shared_ptr< DMatrix > p_m, PredictionType type, float missing, HostDeviceVector< bst_float > **out_preds, uint32_t layer_begin, uint32_t layer_end)=0
Inplace prediction.
std::unique_ptr< GradientBooster > gbm_
The gradient booster used by the model.
Definition: learner.h:287
virtual void GetFeatureNames(std::vector< std::string > *fn) const =0
Get the feature names for current booster.
void LoadModel(Json const &in) override=0
load the model from a JSON object
virtual void SetFeatureTypes(std::vector< std::string > const &ft)=0
Set the feature types for current booster.
void SaveModel(Json *out) const override=0
saves the model config to a JSON object
static Learner * Create(const std::vector< std::shared_ptr< DMatrix > > &cache_data)
Create a new instance of learner.
Context ctx_
Training parameter.
Definition: learner.h:291
virtual uint32_t Groups() const =0
virtual uint32_t GetNumFeature() const =0
Get the number of features of the booster.
virtual void GetFeatureTypes(std::vector< std::string > *ft) const =0
Get the feature types for current booster.
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition: span.h:423
A tensor view with static type and dimension. It implements indexing and slicing.
Definition: linalg.h:262
Feature map data structure to help visualization and model dump.
A device-and-host vector abstraction layer.
Defines the abstract interface for different components in XGBoost.
Definition: intrusive_ptr.h:207
namespace of xgboost
Definition: base.h:110
std::vector< std::pair< std::string, std::string > > Args
Definition: base.h:318
uint32_t bst_feature_t
Type for data column (feature) index.
Definition: base.h:123
PredictionType
Definition: learner.h:33
Interface of predictor, performs predictions for a gradient booster.
Definition: model.h:31
Definition: generic_parameters.h:15
Definition: learner.h:299
uint32_t num_feature
Definition: learner.h:309
LearnerModelParam(Context const *ctx, LearnerModelParamLegacy const &user_param, linalg::Tensor< float, 1 > base_margin, ObjInfo t)
linalg::TensorView< float const, 1 > BaseScore(int32_t device) const
bool Initialized() const
Definition: learner.h:331
LearnerModelParam(LearnerModelParamLegacy const &user_param, ObjInfo t)
LearnerModelParam(bst_feature_t n_features, linalg::Tensor< float, 1 > base_margin, uint32_t n_groups)
Definition: learner.h:321
uint32_t num_output_group
Definition: learner.h:311
void Copy(LearnerModelParam const &that)
linalg::TensorView< float const, 1 > BaseScore(Context const *ctx) const
ObjInfo task
Definition: learner.h:313
Definition: model.h:17
A struct returned by objective, which determines task at hand. The struct is not used by any algorith...
Definition: task.h:24
@ kRegression
Definition: task.h:27
Contains pointer to input matrix and associated cached predictions.
Definition: predictor.h:35
entry to to easily hold returning information
Definition: learner.h:44
std::vector< bst_ulong > prediction_shape
Temp variable for returning prediction shape.
Definition: learner.h:60
std::vector< GradientPair > tmp_gpair
temp variable of gradient pairs.
Definition: learner.h:56
PredictionCacheEntry prediction_entry
Temp variable for returning prediction result.
Definition: learner.h:58
std::vector< const char * > ret_vec_charp
result holder for returning string pointers
Definition: learner.h:52
std::string ret_str
result holder for returning string
Definition: learner.h:46
std::vector< char > ret_char_vec
result holder for returning raw buffer
Definition: learner.h:48
std::vector< std::string > ret_vec_str
result holder for returning strings
Definition: learner.h:50
std::vector< bst_float > ret_vec_float
returning float vector.
Definition: learner.h:54