xgboost
|
Meta information about dataset, always sit in memory. More...
#include <data.h>
Public Member Functions | |
MetaInfo ()=default | |
default constructor More... | |
MetaInfo (MetaInfo &&that)=default | |
MetaInfo & | operator= (MetaInfo &&that)=default |
MetaInfo & | operator= (MetaInfo const &that)=delete |
void | Validate (int32_t device) const |
Validate all metainfo. More... | |
MetaInfo | Slice (common::Span< int32_t const > ridxs) const |
MetaInfo | Copy () const |
bst_float | GetWeight (size_t i) const |
Get weight of each instances. More... | |
const std::vector< size_t > & | LabelAbsSort (Context const *ctx) const |
get sorted indexes (argsort) of labels by absolute value (used by cox loss) More... | |
void | Clear () |
clear all the information More... | |
void | LoadBinary (dmlc::Stream *fi) |
Load the Meta info from binary stream. More... | |
void | SaveBinary (dmlc::Stream *fo) const |
Save the Meta info to binary stream. More... | |
void | SetInfo (Context const &ctx, const char *key, const void *dptr, DataType dtype, size_t num) |
Set information in the meta info. More... | |
void | SetInfo (Context const &ctx, StringView key, StringView interface_str) |
Set information in the meta info with array interface. More... | |
void | GetInfo (char const *key, bst_ulong *out_len, DataType dtype, const void **out_dptr) const |
void | SetFeatureInfo (const char *key, const char **info, const bst_ulong size) |
void | GetFeatureInfo (const char *field, std::vector< std::string > *out_str_vecs) const |
void | Extend (MetaInfo const &that, bool accumulate_rows, bool check_column) |
void | SynchronizeNumberOfColumns () |
Synchronize the number of columns across all workers. More... | |
bool | IsRowSplit () const |
Whether the data is split row-wise. More... | |
bool | IsColumnSplit () const |
Whether the data is split column-wise. More... | |
bool | IsRanking () const |
Whether this is a learning to rank data. More... | |
bool | IsVerticalFederated () const |
A convenient method to check if we are doing vertical federated learning, which requires some special processing. More... | |
bool | ShouldHaveLabels () const |
A convenient method to check if the MetaInfo should contain labels. More... | |
Public Attributes | |
uint64_t | num_row_ {0} |
number of rows in the data More... | |
uint64_t | num_col_ {0} |
number of columns in the data More... | |
uint64_t | num_nonzero_ {0} |
number of nonzero entries in the data More... | |
linalg::Tensor< float, 2 > | labels |
label of each instance More... | |
DataSplitMode | data_split_mode {DataSplitMode::kRow} |
data split mode More... | |
std::vector< bst_group_t > | group_ptr_ |
the index of begin and end of a group needed when the learning task is ranking. More... | |
HostDeviceVector< bst_float > | weights_ |
weights of each instance, optional More... | |
linalg::Tensor< float, 2 > | base_margin_ |
initialized margins, if specified, xgboost will start from this init margin can be used to specify initial prediction to boost from. More... | |
HostDeviceVector< bst_float > | labels_lower_bound_ |
lower bound of the label, to be used for survival analysis (censored regression) More... | |
HostDeviceVector< bst_float > | labels_upper_bound_ |
upper bound of the label, to be used for survival analysis (censored regression) More... | |
std::vector< std::string > | feature_type_names |
Name of type for each feature provided by users. Eg. "int"/"float"/"i"/"q". More... | |
std::vector< std::string > | feature_names |
Name for each feature. More... | |
HostDeviceVector< FeatureType > | feature_types |
HostDeviceVector< float > | feature_weights |
Static Public Attributes | |
static constexpr uint64_t | kNumField = 12 |
number of data fields in MetaInfo More... | |
Meta information about dataset, always sit in memory.
|
default |
default constructor
|
default |
void xgboost::MetaInfo::Clear | ( | ) |
clear all the information
MetaInfo xgboost::MetaInfo::Copy | ( | ) | const |
void xgboost::MetaInfo::Extend | ( | MetaInfo const & | that, |
bool | accumulate_rows, | ||
bool | check_column | ||
) |
void xgboost::MetaInfo::GetFeatureInfo | ( | const char * | field, |
std::vector< std::string > * | out_str_vecs | ||
) | const |
void xgboost::MetaInfo::GetInfo | ( | char const * | key, |
bst_ulong * | out_len, | ||
DataType | dtype, | ||
const void ** | out_dptr | ||
) | const |
|
inline |
Get weight of each instances.
i | Instance index. |
|
inline |
Whether the data is split column-wise.
|
inline |
Whether this is a learning to rank data.
|
inline |
Whether the data is split row-wise.
bool xgboost::MetaInfo::IsVerticalFederated | ( | ) | const |
A convenient method to check if we are doing vertical federated learning, which requires some special processing.
const std::vector<size_t>& xgboost::MetaInfo::LabelAbsSort | ( | Context const * | ctx | ) | const |
get sorted indexes (argsort) of labels by absolute value (used by cox loss)
void xgboost::MetaInfo::LoadBinary | ( | dmlc::Stream * | fi | ) |
Load the Meta info from binary stream.
fi | The input stream |
void xgboost::MetaInfo::SaveBinary | ( | dmlc::Stream * | fo | ) | const |
Save the Meta info to binary stream.
fo | The output stream. |
void xgboost::MetaInfo::SetFeatureInfo | ( | const char * | key, |
const char ** | info, | ||
const bst_ulong | size | ||
) |
void xgboost::MetaInfo::SetInfo | ( | Context const & | ctx, |
const char * | key, | ||
const void * | dptr, | ||
DataType | dtype, | ||
size_t | num | ||
) |
Set information in the meta info.
key | The key of the information. |
dptr | The data pointer of the source array. |
dtype | The type of the source data. |
num | Number of elements in the source array. |
void xgboost::MetaInfo::SetInfo | ( | Context const & | ctx, |
StringView | key, | ||
StringView | interface_str | ||
) |
Set information in the meta info with array interface.
key | The key of the information. |
interface_str | String representation of json format array interface. |
bool xgboost::MetaInfo::ShouldHaveLabels | ( | ) | const |
A convenient method to check if the MetaInfo should contain labels.
Normally we assume labels are available everywhere. The only exception is in vertical federated learning where labels are only available on worker 0.
MetaInfo xgboost::MetaInfo::Slice | ( | common::Span< int32_t const > | ridxs | ) | const |
void xgboost::MetaInfo::SynchronizeNumberOfColumns | ( | ) |
Synchronize the number of columns across all workers.
Normally we just need to find the maximum number of columns across all workers, but in vertical federated learning, since each worker loads its own list of columns, we need to sum them.
void xgboost::MetaInfo::Validate | ( | int32_t | device | ) | const |
Validate all metainfo.
linalg::Tensor<float, 2> xgboost::MetaInfo::base_margin_ |
initialized margins, if specified, xgboost will start from this init margin can be used to specify initial prediction to boost from.
DataSplitMode xgboost::MetaInfo::data_split_mode {DataSplitMode::kRow} |
data split mode
std::vector<std::string> xgboost::MetaInfo::feature_names |
Name for each feature.
std::vector<std::string> xgboost::MetaInfo::feature_type_names |
Name of type for each feature provided by users. Eg. "int"/"float"/"i"/"q".
HostDeviceVector<FeatureType> xgboost::MetaInfo::feature_types |
HostDeviceVector<float> xgboost::MetaInfo::feature_weights |
std::vector<bst_group_t> xgboost::MetaInfo::group_ptr_ |
the index of begin and end of a group needed when the learning task is ranking.
|
staticconstexpr |
number of data fields in MetaInfo
linalg::Tensor<float, 2> xgboost::MetaInfo::labels |
label of each instance
HostDeviceVector<bst_float> xgboost::MetaInfo::labels_lower_bound_ |
lower bound of the label, to be used for survival analysis (censored regression)
HostDeviceVector<bst_float> xgboost::MetaInfo::labels_upper_bound_ |
upper bound of the label, to be used for survival analysis (censored regression)
uint64_t xgboost::MetaInfo::num_col_ {0} |
number of columns in the data
uint64_t xgboost::MetaInfo::num_nonzero_ {0} |
number of nonzero entries in the data
uint64_t xgboost::MetaInfo::num_row_ {0} |
number of rows in the data
HostDeviceVector<bst_float> xgboost::MetaInfo::weights_ |
weights of each instance, optional