Internal data structured used by XGBoost during training. There are two ways to create a customized DMatrix that reads in user defined-format.
More...
#include <data.h>
|
static DMatrix * | Load (const std::string &uri, bool silent, bool load_row_split, const std::string &file_format="auto", size_t page_size=kPageSize) |
| Load DMatrix from URI. More...
|
|
template<typename AdapterT > |
static DMatrix * | Create (AdapterT *adapter, float missing, int nthread, const std::string &cache_prefix="", size_t page_size=kPageSize) |
| Creates a new DMatrix from an external data adapter. More...
|
|
Internal data structured used by XGBoost during training. There are two ways to create a customized DMatrix that reads in user defined-format.
- Provide a dmlc::Parser and pass into the DMatrix::Create
- Alternatively, if data can be represented by an URL, define a new dmlc::Parser and register by DMLC_REGISTER_DATA_PARSER;
- This works best for user defined data input source, such as data-base, filesystem.
- Provide a DataSource, that can be passed to DMatrix::Create This can be used to re-use inmemory data structure into DMatrix.
◆ DMatrix()
xgboost::DMatrix::DMatrix |
( |
| ) |
|
|
default |
◆ ~DMatrix()
virtual xgboost::DMatrix::~DMatrix |
( |
| ) |
|
|
virtualdefault |
◆ Create()
template<typename AdapterT >
static DMatrix* xgboost::DMatrix::Create |
( |
AdapterT * |
adapter, |
|
|
float |
missing, |
|
|
int |
nthread, |
|
|
const std::string & |
cache_prefix = "" , |
|
|
size_t |
page_size = kPageSize |
|
) |
| |
|
static |
Creates a new DMatrix from an external data adapter.
- Template Parameters
-
AdapterT | Type of the adapter. |
- Parameters
-
[in,out] | adapter | View onto an external data. |
| missing | Values to count as missing. |
| nthread | Number of threads for construction. |
| cache_prefix | (Optional) The cache prefix for external memory. |
| page_size | (Optional) Size of the page. |
- Returns
- a Created DMatrix.
◆ EllpackExists()
virtual bool xgboost::DMatrix::EllpackExists |
( |
| ) |
const |
|
protectedpure virtual |
◆ GetBatches() [1/5]
Gets batches. Use range based for loop over BatchSet to access individual batches.
◆ GetBatches() [2/5]
◆ GetBatches() [3/5]
◆ GetBatches() [4/5]
◆ GetBatches() [5/5]
◆ GetColumnBatches()
◆ GetEllpackBatches()
◆ GetRowBatches()
◆ GetSortedColumnBatches()
◆ Info() [1/2]
virtual MetaInfo& xgboost::DMatrix::Info |
( |
| ) |
|
|
pure virtual |
meta information of the dataset
◆ Info() [2/2]
virtual const MetaInfo& xgboost::DMatrix::Info |
( |
| ) |
const |
|
pure virtual |
meta information of the dataset
◆ IsDense()
bool xgboost::DMatrix::IsDense |
( |
| ) |
const |
|
inline |
Whether the matrix is dense.
◆ Load()
static DMatrix* xgboost::DMatrix::Load |
( |
const std::string & |
uri, |
|
|
bool |
silent, |
|
|
bool |
load_row_split, |
|
|
const std::string & |
file_format = "auto" , |
|
|
size_t |
page_size = kPageSize |
|
) |
| |
|
static |
Load DMatrix from URI.
- Parameters
-
uri | The URI of input. |
silent | Whether print information during loading. |
load_row_split | Flag to read in part of rows, divided among the workers in distributed mode. |
file_format | The format type of the file, used for dmlc::Parser::Create. By default "auto" will be able to load in both local binary file. |
page_size | Page size for external memory. |
- Returns
- The created DMatrix.
◆ PageExists() [1/3]
template<typename T >
bool xgboost::DMatrix::PageExists |
( |
| ) |
const |
◆ PageExists() [2/3]
template<>
bool xgboost::DMatrix::PageExists |
( |
| ) |
const |
|
inline |
◆ PageExists() [3/3]
template<>
bool xgboost::DMatrix::PageExists |
( |
| ) |
const |
|
inline |
◆ SingleColBlock()
virtual bool xgboost::DMatrix::SingleColBlock |
( |
| ) |
const |
|
pure virtual |
- Returns
- Whether the data columns single column block.
◆ Slice()
◆ SparsePageExists()
virtual bool xgboost::DMatrix::SparsePageExists |
( |
| ) |
const |
|
protectedpure virtual |
◆ kPageSize
const size_t xgboost::DMatrix::kPageSize = 32UL << 20UL |
|
static |
The documentation for this class was generated from the following file:
- /workspace/include/xgboost/data.h