xgboost
|
Internal data structured used by XGBoost during training. More...
#include <data.h>
Public Member Functions | |
DMatrix ()=default | |
default constructor More... | |
virtual MetaInfo & | Info ()=0 |
meta information of the dataset More... | |
virtual void | SetInfo (const char *key, const void *dptr, DataType dtype, size_t num) |
virtual void | SetInfo (const char *key, std::string const &interface_str) |
virtual const MetaInfo & | Info () const =0 |
meta information of the dataset More... | |
XGBAPIThreadLocalEntry & | GetThreadLocal () const |
Get thread local memory for returning data from DMatrix. More... | |
template<typename T > | |
BatchSet< T > | GetBatches (const BatchParam ¶m={}) |
Gets batches. Use range based for loop over BatchSet to access individual batches. More... | |
template<typename T > | |
bool | PageExists () const |
virtual bool | SingleColBlock () const =0 |
virtual | ~DMatrix () |
virtual destructor More... | |
bool | IsDense () const |
Whether the matrix is dense. More... | |
virtual DMatrix * | Slice (common::Span< int32_t const > ridxs)=0 |
template<> | |
BatchSet< SparsePage > | GetBatches (const BatchParam &) |
template<> | |
bool | PageExists () const |
template<> | |
bool | PageExists () const |
Static Public Member Functions | |
static DMatrix * | Load (const std::string &uri, bool silent, bool load_row_split, const std::string &file_format="auto") |
Load DMatrix from URI. More... | |
template<typename AdapterT > | |
static DMatrix * | Create (AdapterT *adapter, float missing, int nthread, const std::string &cache_prefix="") |
Creates a new DMatrix from an external data adapter. More... | |
template<typename DataIterHandle , typename DMatrixHandle , typename DataIterResetCallback , typename XGDMatrixCallbackNext > | |
static DMatrix * | Create (DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset, XGDMatrixCallbackNext *next, float missing, int nthread, int max_bin) |
Create a new Quantile based DMatrix used for histogram based algorithm. More... | |
template<typename DataIterHandle , typename DMatrixHandle , typename DataIterResetCallback , typename XGDMatrixCallbackNext > | |
static DMatrix * | Create (DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset, XGDMatrixCallbackNext *next, float missing, int32_t nthread, std::string cache) |
Create an external memory DMatrix with callbacks. More... | |
Static Public Attributes | |
static const size_t | kPageSize = 32UL << 12UL |
Number of rows per page in external memory. Approximately 100MB per page for dataset with 100 features. More... | |
Protected Member Functions | |
virtual BatchSet< SparsePage > | GetRowBatches ()=0 |
virtual BatchSet< CSCPage > | GetColumnBatches ()=0 |
virtual BatchSet< SortedCSCPage > | GetSortedColumnBatches ()=0 |
virtual BatchSet< EllpackPage > | GetEllpackBatches (const BatchParam ¶m)=0 |
virtual BatchSet< GHistIndexMatrix > | GetGradientIndex (const BatchParam ¶m)=0 |
virtual bool | EllpackExists () const =0 |
virtual bool | SparsePageExists () const =0 |
Internal data structured used by XGBoost during training.
|
default |
default constructor
|
virtual |
virtual destructor
|
static |
Creates a new DMatrix from an external data adapter.
AdapterT | Type of the adapter. |
[in,out] | adapter | View onto an external data. |
missing | Values to count as missing. | |
nthread | Number of threads for construction. | |
cache_prefix | (Optional) The cache prefix for external memory. | |
page_size | (Optional) Size of the page. |
|
static |
Create a new Quantile based DMatrix used for histogram based algorithm.
DataIterHandle | External iterator type, defined in C API. |
DMatrixHandle | DMatrix handle, defined in C API. |
DataIterResetCallback | Callback for reset, prototype defined in C API. |
XGDMatrixCallbackNext | Callback for next, prototype defined in C API. |
iter | External data iterator |
proxy | A hanlde to ProxyDMatrix |
reset | Callback for reset |
next | Callback for next |
missing | Value that should be treated as missing. |
nthread | number of threads used for initialization. |
max_bin | Maximum number of bins. |
|
static |
Create an external memory DMatrix with callbacks.
DataIterHandle | External iterator type, defined in C API. |
DMatrixHandle | DMatrix handle, defined in C API. |
DataIterResetCallback | Callback for reset, prototype defined in C API. |
XGDMatrixCallbackNext | Callback for next, prototype defined in C API. |
iter | External data iterator |
proxy | A hanlde to ProxyDMatrix |
reset | Callback for reset |
next | Callback for next |
missing | Value that should be treated as missing. |
nthread | number of threads used for initialization. |
cache | Prefix of cache file path. |
|
protectedpure virtual |
|
inline |
BatchSet<T> xgboost::DMatrix::GetBatches | ( | const BatchParam & | param = {} | ) |
Gets batches. Use range based for loop over BatchSet to access individual batches.
|
protectedpure virtual |
|
protectedpure virtual |
|
protectedpure virtual |
|
protectedpure virtual |
XGBAPIThreadLocalEntry& xgboost::DMatrix::GetThreadLocal | ( | ) | const |
Get thread local memory for returning data from DMatrix.
|
pure virtual |
meta information of the dataset
|
pure virtual |
meta information of the dataset
|
inline |
Whether the matrix is dense.
|
static |
Load DMatrix from URI.
uri | The URI of input. |
silent | Whether print information during loading. |
load_row_split | Flag to read in part of rows, divided among the workers in distributed mode. |
file_format | The format type of the file, used for dmlc::Parser::Create. By default "auto" will be able to load in both local binary file. |
page_size | Page size for external memory. |
bool xgboost::DMatrix::PageExists | ( | ) | const |
|
inline |
|
inline |
|
inlinevirtual |
|
inlinevirtual |
|
pure virtual |
|
pure virtual |
|
protectedpure virtual |
|
static |
Number of rows per page in external memory. Approximately 100MB per page for dataset with 100 features.