xgboost
|
Internal data structured used by XGBoost to hold all external data. More...
#include <data.h>
Public Member Functions | |
DMatrix ()=default | |
default constructor More... | |
virtual MetaInfo & | Info ()=0 |
meta information of the dataset More... | |
virtual void | SetInfo (const char *key, std::string const &interface_str) |
virtual const MetaInfo & | Info () const =0 |
meta information of the dataset More... | |
XGBAPIThreadLocalEntry & | GetThreadLocal () const |
Get thread local memory for returning data from DMatrix. More... | |
virtual Context const * | Ctx () const =0 |
Get the context object of this DMatrix. The context is created during construction of DMatrix with user specified nthread parameter. More... | |
template<typename T > | |
BatchSet< T > | GetBatches () |
Gets batches. Use range based for loop over BatchSet to access individual batches. More... | |
template<typename T > | |
BatchSet< T > | GetBatches (Context const *ctx) |
template<typename T > | |
BatchSet< T > | GetBatches (Context const *ctx, const BatchParam ¶m) |
template<typename T > | |
bool | PageExists () const |
bool | SingleColBlock () const |
virtual std::int32_t | NumBatches () const |
virtual | ~DMatrix () |
bool | IsDense () const |
Whether the matrix is dense. More... | |
virtual DMatrix * | Slice (common::Span< int32_t const > ridxs)=0 |
virtual DMatrix * | SliceCol (int num_slices, int slice_id)=0 |
Slice a DMatrix by columns. More... | |
template<> | |
BatchSet< SparsePage > | GetBatches () |
template<> | |
bool | PageExists () const |
template<> | |
bool | PageExists () const |
template<> | |
bool | PageExists () const |
template<> | |
BatchSet< SparsePage > | GetBatches (Context const *) |
template<> | |
BatchSet< EllpackPage > | GetBatches (Context const *ctx, BatchParam const ¶m) |
Static Public Member Functions | |
static DMatrix * | Load (const std::string &uri, bool silent=true, DataSplitMode data_split_mode=DataSplitMode::kRow) |
Load DMatrix from URI. More... | |
template<typename AdapterT > | |
static DMatrix * | Create (AdapterT *adapter, float missing, int nthread, const std::string &cache_prefix="", DataSplitMode data_split_mode=DataSplitMode::kRow) |
Creates a new DMatrix from an external data adapter. More... | |
template<typename DataIterHandle , typename DMatrixHandle , typename DataIterResetCallback , typename XGDMatrixCallbackNext > | |
static DMatrix * | Create (DataIterHandle iter, DMatrixHandle proxy, std::shared_ptr< DMatrix > ref, DataIterResetCallback *reset, XGDMatrixCallbackNext *next, float missing, std::int32_t nthread, bst_bin_t max_bin, std::int64_t max_quantile_blocks) |
Create a new Quantile based DMatrix used for histogram based algorithm. More... | |
template<typename DataIterHandle , typename DMatrixHandle , typename DataIterResetCallback , typename XGDMatrixCallbackNext > | |
static DMatrix * | Create (DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset, XGDMatrixCallbackNext *next, ExtMemConfig const &config) |
Create an external memory DMatrix with callbacks. More... | |
template<typename DataIterHandle , typename DMatrixHandle , typename DataIterResetCallback , typename XGDMatrixCallbackNext > | |
static DMatrix * | Create (DataIterHandle iter, DMatrixHandle proxy, std::shared_ptr< DMatrix > ref, DataIterResetCallback *reset, XGDMatrixCallbackNext *next, bst_bin_t max_bin, std::int64_t max_quantile_blocks, ExtMemConfig const &config) |
Create an external memory quantile DMatrix with callbacks. More... | |
Protected Member Functions | |
virtual BatchSet< SparsePage > | GetRowBatches ()=0 |
virtual BatchSet< CSCPage > | GetColumnBatches (Context const *ctx)=0 |
virtual BatchSet< SortedCSCPage > | GetSortedColumnBatches (Context const *ctx)=0 |
virtual BatchSet< EllpackPage > | GetEllpackBatches (Context const *ctx, BatchParam const ¶m)=0 |
virtual BatchSet< GHistIndexMatrix > | GetGradientIndex (Context const *ctx, BatchParam const ¶m)=0 |
virtual BatchSet< ExtSparsePage > | GetExtBatches (Context const *ctx, BatchParam const ¶m)=0 |
virtual bool | EllpackExists () const =0 |
virtual bool | GHistIndexExists () const =0 |
virtual bool | SparsePageExists () const =0 |
Internal data structured used by XGBoost to hold all external data.
There are multiple variants of the DMatrix class and can be accessed through the Create() methods. The DMatrix itself holds the predictor X
, and other data including labels and sample weights are stored in the MetaInfo class.
|
default |
default constructor
|
virtual |
|
static |
Creates a new DMatrix from an external data adapter.
AdapterT | Type of the adapter. |
[in,out] | adapter | View onto an external data. |
missing | Values to count as missing. | |
nthread | Number of threads for construction. | |
cache_prefix | (Optional) The cache prefix for external memory. | |
data_split_mode | (Optional) Data split mode. |
|
static |
Create an external memory DMatrix with callbacks.
DataIterHandle | External iterator type, defined in C API. |
DMatrixHandle | DMatrix handle, defined in C API. |
DataIterResetCallback | Callback for reset, prototype defined in C API. |
XGDMatrixCallbackNext | Callback for next, prototype defined in C API. |
iter | External data iterator |
proxy | A hanlde to ProxyDMatrix |
reset | Callback for reset |
next | Callback for next |
config | Configuration for the cache. |
|
static |
|
static |
Create a new Quantile based DMatrix used for histogram based algorithm.
DataIterHandle | External iterator type, defined in C API. |
DMatrixHandle | DMatrix handle, defined in C API. |
DataIterResetCallback | Callback for reset, prototype defined in C API. |
XGDMatrixCallbackNext | Callback for next, prototype defined in C API. |
iter | External data iterator |
proxy | A hanlde to ProxyDMatrix |
ref | Reference Quantile DMatrix. |
reset | Callback for reset |
next | Callback for next |
missing | Value that should be treated as missing. |
nthread | number of threads used for initialization. |
max_bin | Maximum number of bins. |
|
pure virtual |
|
protectedpure virtual |
BatchSet<T> xgboost::DMatrix::GetBatches | ( | ) |
Gets batches. Use range based for loop over BatchSet to access individual batches.
|
inline |
|
inline |
|
inline |
BatchSet<T> xgboost::DMatrix::GetBatches | ( | Context const * | ctx, |
const BatchParam & | param | ||
) |
|
protectedpure virtual |
|
protectedpure virtual |
|
protectedpure virtual |
|
protectedpure virtual |
|
protectedpure virtual |
|
protectedpure virtual |
XGBAPIThreadLocalEntry& xgboost::DMatrix::GetThreadLocal | ( | ) | const |
Get thread local memory for returning data from DMatrix.
|
protectedpure virtual |
|
pure virtual |
meta information of the dataset
|
pure virtual |
meta information of the dataset
|
inline |
Whether the matrix is dense.
|
static |
|
inlinevirtual |
bool xgboost::DMatrix::PageExists | ( | ) | const |
|
inline |
|
inline |
|
inline |
|
inlinevirtual |
|
inline |
The naming is legacy.
|
pure virtual |
|
pure virtual |
|
protectedpure virtual |