xgboost
|
define regression tree to be the most common tree model. This is the data structure used in xgboost's major tree models. More...
#include <tree_model.h>
Classes | |
struct | CategoricalSplitMatrix |
struct | FVec |
dense feature vector that can be taken by RegTree and can be construct from sparse feature vector. More... | |
class | Node |
tree node More... | |
struct | Segment |
Public Types | |
using | SplitCondT = bst_float |
Public Member Functions | |
void | ChangeToLeaf (int rid, bst_float value) |
change a non leaf node to a leaf node, delete its children More... | |
void | CollapseToLeaf (int rid, bst_float value) |
collapse a non leaf node to a leaf node, delete its children More... | |
RegTree () | |
constructor More... | |
Node & | operator[] (int nid) |
get node given nid More... | |
const Node & | operator[] (int nid) const |
get node given nid More... | |
const std::vector< Node > & | GetNodes () const |
get const reference to nodes More... | |
const std::vector< RTreeNodeStat > & | GetStats () const |
get const reference to stats More... | |
RTreeNodeStat & | Stat (int nid) |
get node statistics given nid More... | |
const RTreeNodeStat & | Stat (int nid) const |
get node statistics given nid More... | |
void | Load (dmlc::Stream *fi) |
load model from stream More... | |
void | Save (dmlc::Stream *fo) const |
save model to stream More... | |
void | LoadModel (Json const &in) override |
load the model from a JSON object More... | |
void | SaveModel (Json *out) const override |
saves the model config to a JSON object More... | |
bool | operator== (const RegTree &b) const |
template<typename Func > | |
void | WalkTree (Func func) const |
bool | Equal (const RegTree &b) const |
Compares whether 2 trees are equal from a user's perspective. The equality compares only non-deleted nodes. More... | |
void | ExpandNode (bst_node_t nid, unsigned split_index, bst_float split_value, bool default_left, bst_float base_weight, bst_float left_leaf_weight, bst_float right_leaf_weight, bst_float loss_change, float sum_hess, float left_sum, float right_sum, bst_node_t leaf_right_child=kInvalidNodeId) |
Expands a leaf node into two additional leaf nodes. More... | |
void | ExpandCategorical (bst_node_t nid, unsigned split_index, common::Span< uint32_t > split_cat, bool default_left, bst_float base_weight, bst_float left_leaf_weight, bst_float right_leaf_weight, bst_float loss_change, float sum_hess, float left_sum, float right_sum) |
Expands a leaf node with categories. More... | |
bool | HasCategoricalSplit () const |
int | GetDepth (int nid) const |
get current depth More... | |
int | MaxDepth (int nid) const |
get maximum depth More... | |
int | MaxDepth () |
get maximum depth More... | |
int | NumExtraNodes () const |
number of extra nodes besides the root More... | |
bst_node_t | GetNumLeaves () const |
bst_node_t | GetNumSplitNodes () const |
void | CalculateContributions (const RegTree::FVec &feat, std::vector< float > *mean_values, bst_float *out_contribs, int condition=0, unsigned condition_feature=0) const |
calculate the feature contributions (https://arxiv.org/abs/1706.06060) for the tree More... | |
void | TreeShap (const RegTree::FVec &feat, bst_float *phi, bst_node_t node_index, unsigned unique_depth, PathElement *parent_unique_path, bst_float parent_zero_fraction, bst_float parent_one_fraction, int parent_feature_index, int condition, unsigned condition_feature, bst_float condition_fraction) const |
Recursive function that computes the feature attributions for a single tree. More... | |
void | CalculateContributionsApprox (const RegTree::FVec &feat, std::vector< float > *mean_values, bst_float *out_contribs) const |
calculate the approximate feature contributions for the given root More... | |
std::string | DumpModel (const FeatureMap &fmap, bool with_stats, std::string format) const |
dump the model in the requested format as a text string More... | |
FeatureType | NodeSplitType (bst_node_t nidx) const |
Get split type for a node. More... | |
const std::vector< FeatureType > & | GetSplitTypes () const |
Get split types for all nodes. More... | |
common::Span< uint32_t const > | GetSplitCategories () const |
auto const & | GetSplitCategoriesPtr () const |
CategoricalSplitMatrix | GetCategoriesMatrix () const |
![]() | |
virtual | ~Model ()=default |
Public Attributes | |
TreeParam | param |
model parameter More... | |
Static Public Attributes | |
static constexpr bst_node_t | kInvalidNodeId {-1} |
static constexpr uint32_t | kDeletedNodeMarker = std::numeric_limits<uint32_t>::max() |
static constexpr bst_node_t | kRoot { 0 } |
define regression tree to be the most common tree model. This is the data structure used in xgboost's major tree models.
|
inline |
constructor
void xgboost::RegTree::CalculateContributions | ( | const RegTree::FVec & | feat, |
std::vector< float > * | mean_values, | ||
bst_float * | out_contribs, | ||
int | condition = 0 , |
||
unsigned | condition_feature = 0 |
||
) | const |
calculate the feature contributions (https://arxiv.org/abs/1706.06060) for the tree
feat | dense feature vector, if the feature is missing the field is set to NaN |
out_contribs | output vector to hold the contributions |
condition | fix one feature to either off (-1) on (1) or not fixed (0 default) |
condition_feature | the index of the feature to fix |
void xgboost::RegTree::CalculateContributionsApprox | ( | const RegTree::FVec & | feat, |
std::vector< float > * | mean_values, | ||
bst_float * | out_contribs | ||
) | const |
calculate the approximate feature contributions for the given root
feat | dense feature vector, if the feature is missing the field is set to NaN |
out_contribs | output vector to hold the contributions |
|
inline |
change a non leaf node to a leaf node, delete its children
rid | node id of the node |
value | new leaf value |
|
inline |
collapse a non leaf node to a leaf node, delete its children
rid | node id of the node |
value | new leaf value |
std::string xgboost::RegTree::DumpModel | ( | const FeatureMap & | fmap, |
bool | with_stats, | ||
std::string | format | ||
) | const |
dump the model in the requested format as a text string
fmap | feature map that may help give interpretations of feature |
with_stats | whether dump out statistics as well |
format | the format to dump the model in |
bool xgboost::RegTree::Equal | ( | const RegTree & | b | ) | const |
Compares whether 2 trees are equal from a user's perspective. The equality compares only non-deleted nodes.
b | The other tree. |
void xgboost::RegTree::ExpandCategorical | ( | bst_node_t | nid, |
unsigned | split_index, | ||
common::Span< uint32_t > | split_cat, | ||
bool | default_left, | ||
bst_float | base_weight, | ||
bst_float | left_leaf_weight, | ||
bst_float | right_leaf_weight, | ||
bst_float | loss_change, | ||
float | sum_hess, | ||
float | left_sum, | ||
float | right_sum | ||
) |
Expands a leaf node with categories.
nid | The node index to expand. |
split_index | Feature index of the split. |
split_cat | The bitset containing categories |
default_left | True to default left. |
base_weight | The base weight, before learning rate. |
left_leaf_weight | The left leaf weight for prediction, modified by learning rate. |
right_leaf_weight | The right leaf weight for prediction, modified by learning rate. |
loss_change | The loss change. |
sum_hess | The sum hess. |
left_sum | The sum hess of left leaf. |
right_sum | The sum hess of right leaf. |
void xgboost::RegTree::ExpandNode | ( | bst_node_t | nid, |
unsigned | split_index, | ||
bst_float | split_value, | ||
bool | default_left, | ||
bst_float | base_weight, | ||
bst_float | left_leaf_weight, | ||
bst_float | right_leaf_weight, | ||
bst_float | loss_change, | ||
float | sum_hess, | ||
float | left_sum, | ||
float | right_sum, | ||
bst_node_t | leaf_right_child = kInvalidNodeId |
||
) |
Expands a leaf node into two additional leaf nodes.
nid | The node index to expand. |
split_index | Feature index of the split. |
split_value | The split condition. |
default_left | True to default left. |
base_weight | The base weight, before learning rate. |
left_leaf_weight | The left leaf weight for prediction, modified by learning rate. |
right_leaf_weight | The right leaf weight for prediction, modified by learning rate. |
loss_change | The loss change. |
sum_hess | The sum hess. |
left_sum | The sum hess of left leaf. |
right_sum | The sum hess of right leaf. |
leaf_right_child | The right child index of leaf, by default kInvalidNodeId, some updaters use the right child index of leaf as a marker |
|
inline |
|
inline |
get current depth
nid | node id |
|
inline |
get const reference to nodes
bst_node_t xgboost::RegTree::GetNumLeaves | ( | ) | const |
bst_node_t xgboost::RegTree::GetNumSplitNodes | ( | ) | const |
|
inline |
|
inline |
|
inline |
Get split types for all nodes.
|
inline |
get const reference to stats
|
inline |
void xgboost::RegTree::Load | ( | dmlc::Stream * | fi | ) |
load model from stream
fi | input stream |
|
overridevirtual |
load the model from a JSON object
in | JSON object where to load the model from |
Implements xgboost::Model.
|
inline |
get maximum depth
|
inline |
get maximum depth
nid | node id |
|
inline |
Get split type for a node.
nidx | Index of node. |
|
inline |
number of extra nodes besides the root
|
inline |
|
inline |
get node given nid
|
inline |
get node given nid
void xgboost::RegTree::Save | ( | dmlc::Stream * | fo | ) | const |
save model to stream
fo | output stream |
|
overridevirtual |
saves the model config to a JSON object
out | JSON container where to save the model to |
Implements xgboost::Model.
|
inline |
get node statistics given nid
|
inline |
get node statistics given nid
void xgboost::RegTree::TreeShap | ( | const RegTree::FVec & | feat, |
bst_float * | phi, | ||
bst_node_t | node_index, | ||
unsigned | unique_depth, | ||
PathElement * | parent_unique_path, | ||
bst_float | parent_zero_fraction, | ||
bst_float | parent_one_fraction, | ||
int | parent_feature_index, | ||
int | condition, | ||
unsigned | condition_feature, | ||
bst_float | condition_fraction | ||
) | const |
Recursive function that computes the feature attributions for a single tree.
feat | dense feature vector, if the feature is missing the field is set to NaN |
phi | dense output vector of feature attributions |
node_index | the index of the current node in the tree |
unique_depth | how many unique features are above the current node in the tree |
parent_unique_path | a vector of statistics about our current path through the tree |
parent_zero_fraction | what fraction of the parent path weight is coming as 0 (integrated) |
parent_one_fraction | what fraction of the parent path weight is coming as 1 (fixed) |
parent_feature_index | what feature the parent node used to split |
condition | fix one feature to either off (-1) on (1) or not fixed (0 default) |
condition_feature | the index of the feature to fix |
condition_fraction | what fraction of the current weight matches our conditioning feature |
|
inline |
|
staticconstexpr |
|
staticconstexpr |
|
staticconstexpr |
TreeParam xgboost::RegTree::param |
model parameter