Internal data structured used by XGBoost during training. There are two ways to create a customized DMatrix that reads in user defined-format.
More...
#include <data.h>
|
static DMatrix * | Load (const std::string &uri, bool silent, bool load_row_split, const std::string &file_format="auto", size_t page_size=kPageSize) |
| Load DMatrix from URI. More...
|
|
static DMatrix * | Create (std::unique_ptr< DataSource< SparsePage >> &&source, const std::string &cache_prefix="") |
| create a new DMatrix, by wrapping a row_iterator, and meta info. More...
|
|
template<typename AdapterT > |
static DMatrix * | Create (AdapterT *adapter, float missing, int nthread, const std::string &cache_prefix="", size_t page_size=kPageSize) |
| Creates a new DMatrix from an external data adapter. More...
|
|
Internal data structured used by XGBoost during training. There are two ways to create a customized DMatrix that reads in user defined-format.
- Provide a dmlc::Parser and pass into the DMatrix::Create
- Alternatively, if data can be represented by an URL, define a new dmlc::Parser and register by DMLC_REGISTER_DATA_PARSER;
- This works best for user defined data input source, such as data-base, filesystem.
- Provide a DataSource, that can be passed to DMatrix::Create This can be used to re-use inmemory data structure into DMatrix.
◆ DMatrix()
xgboost::DMatrix::DMatrix |
( |
| ) |
|
|
default |
◆ ~DMatrix()
virtual xgboost::DMatrix::~DMatrix |
( |
| ) |
|
|
virtualdefault |
◆ Create() [1/2]
static DMatrix* xgboost::DMatrix::Create |
( |
std::unique_ptr< DataSource< SparsePage >> && |
source, |
|
|
const std::string & |
cache_prefix = "" |
|
) |
| |
|
static |
create a new DMatrix, by wrapping a row_iterator, and meta info.
- Parameters
-
source | The source iterator of the data, the create function takes ownership of the source. |
cache_prefix | The path to prefix of temporary cache file of the DMatrix when used in external memory mode. This can be nullptr for common cases, and in-memory mode will be used. |
- Returns
- a Created DMatrix.
◆ Create() [2/2]
template<typename AdapterT >
static DMatrix* xgboost::DMatrix::Create |
( |
AdapterT * |
adapter, |
|
|
float |
missing, |
|
|
int |
nthread, |
|
|
const std::string & |
cache_prefix = "" , |
|
|
size_t |
page_size = kPageSize |
|
) |
| |
|
static |
Creates a new DMatrix from an external data adapter.
- Template Parameters
-
AdapterT | Type of the adapter. |
- Parameters
-
[in,out] | adapter | View onto an external data. |
| missing | Values to count as missing. |
| nthread | Number of threads for construction. |
| cache_prefix | (Optional) The cache prefix for external memory. |
| page_size | (Optional) Size of the page. |
- Returns
- a Created DMatrix.
◆ GetBatches() [1/5]
Gets batches. Use range based for loop over BatchSet to access individual batches.
◆ GetBatches() [2/5]
◆ GetBatches() [3/5]
◆ GetBatches() [4/5]
◆ GetBatches() [5/5]
◆ GetColDensity()
virtual float xgboost::DMatrix::GetColDensity |
( |
size_t |
cidx | ) |
|
|
pure virtual |
◆ GetColumnBatches()
◆ GetEllpackBatches()
◆ GetRowBatches()
◆ GetSortedColumnBatches()
◆ Info() [1/2]
virtual MetaInfo& xgboost::DMatrix::Info |
( |
| ) |
|
|
pure virtual |
meta information of the dataset
◆ Info() [2/2]
virtual const MetaInfo& xgboost::DMatrix::Info |
( |
| ) |
const |
|
pure virtual |
meta information of the dataset
◆ IsDense()
bool xgboost::DMatrix::IsDense |
( |
| ) |
const |
|
inline |
Whether the matrix is dense.
◆ Load()
static DMatrix* xgboost::DMatrix::Load |
( |
const std::string & |
uri, |
|
|
bool |
silent, |
|
|
bool |
load_row_split, |
|
|
const std::string & |
file_format = "auto" , |
|
|
size_t |
page_size = kPageSize |
|
) |
| |
|
static |
Load DMatrix from URI.
- Parameters
-
uri | The URI of input. |
silent | Whether print information during loading. |
load_row_split | Flag to read in part of rows, divided among the workers in distributed mode. |
file_format | The format type of the file, used for dmlc::Parser::Create. By default "auto" will be able to load in both local binary file. |
page_size | Page size for external memory. |
- Returns
- The created DMatrix.
◆ SaveToLocalFile()
virtual void xgboost::DMatrix::SaveToLocalFile |
( |
const std::string & |
fname | ) |
|
|
virtual |
Save DMatrix to local file. The saved file only works for non-sharded dataset(single machine training). This API is deprecated and dis-encouraged to use.
- Parameters
-
fname | The file name to be saved. |
- Returns
- The created DMatrix.
◆ SingleColBlock()
virtual bool xgboost::DMatrix::SingleColBlock |
( |
| ) |
const |
|
pure virtual |
- Returns
- Whether the data columns single column block.
◆ kPageSize
const size_t xgboost::DMatrix::kPageSize = 32UL << 20UL |
|
static |
The documentation for this class was generated from the following file:
- /workspace/include/xgboost/data.h