xgboost
Public Member Functions | Static Public Member Functions | Protected Member Functions | List of all members
xgboost::DMatrix Class Referenceabstract

Internal data structured used by XGBoost to hold all external data. More...

#include <data.h>

Collaboration diagram for xgboost::DMatrix:
Collaboration graph

Public Member Functions

 DMatrix ()=default
 default constructor More...
 
virtual MetaInfoInfo ()=0
 meta information of the dataset More...
 
virtual void SetInfo (const char *key, std::string const &interface_str)
 
virtual const MetaInfoInfo () const =0
 meta information of the dataset More...
 
XGBAPIThreadLocalEntry & GetThreadLocal () const
 Get thread local memory for returning data from DMatrix. More...
 
virtual Context const * Ctx () const =0
 Get the context object of this DMatrix. The context is created during construction of DMatrix with user specified nthread parameter. More...
 
template<typename T >
BatchSet< T > GetBatches ()
 Gets batches. Use range based for loop over BatchSet to access individual batches. More...
 
template<typename T >
BatchSet< T > GetBatches (Context const *ctx)
 
template<typename T >
BatchSet< T > GetBatches (Context const *ctx, const BatchParam &param)
 
template<typename T >
bool PageExists () const
 
bool SingleColBlock () const
 
virtual std::int32_t NumBatches () const
 
virtual ~DMatrix ()
 
bool IsDense () const
 Whether the matrix is dense. More...
 
virtual DMatrixSlice (common::Span< int32_t const > ridxs)=0
 
virtual DMatrixSliceCol (int num_slices, int slice_id)=0
 Slice a DMatrix by columns. More...
 
template<>
BatchSet< SparsePageGetBatches ()
 
template<>
bool PageExists () const
 
template<>
bool PageExists () const
 
template<>
bool PageExists () const
 
template<>
BatchSet< SparsePageGetBatches (Context const *)
 
template<>
BatchSet< EllpackPage > GetBatches (Context const *ctx, BatchParam const &param)
 

Static Public Member Functions

static DMatrixLoad (const std::string &uri, bool silent=true, DataSplitMode data_split_mode=DataSplitMode::kRow)
 Load DMatrix from URI. More...
 
template<typename AdapterT >
static DMatrixCreate (AdapterT *adapter, float missing, int nthread, const std::string &cache_prefix="", DataSplitMode data_split_mode=DataSplitMode::kRow)
 Creates a new DMatrix from an external data adapter. More...
 
template<typename DataIterHandle , typename DMatrixHandle , typename DataIterResetCallback , typename XGDMatrixCallbackNext >
static DMatrixCreate (DataIterHandle iter, DMatrixHandle proxy, std::shared_ptr< DMatrix > ref, DataIterResetCallback *reset, XGDMatrixCallbackNext *next, float missing, std::int32_t nthread, bst_bin_t max_bin, std::int64_t max_quantile_blocks)
 Create a new Quantile based DMatrix used for histogram based algorithm. More...
 
template<typename DataIterHandle , typename DMatrixHandle , typename DataIterResetCallback , typename XGDMatrixCallbackNext >
static DMatrixCreate (DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset, XGDMatrixCallbackNext *next, ExtMemConfig const &config)
 Create an external memory DMatrix with callbacks. More...
 
template<typename DataIterHandle , typename DMatrixHandle , typename DataIterResetCallback , typename XGDMatrixCallbackNext >
static DMatrixCreate (DataIterHandle iter, DMatrixHandle proxy, std::shared_ptr< DMatrix > ref, DataIterResetCallback *reset, XGDMatrixCallbackNext *next, bst_bin_t max_bin, std::int64_t max_quantile_blocks, ExtMemConfig const &config)
 Create an external memory quantile DMatrix with callbacks. More...
 

Protected Member Functions

virtual BatchSet< SparsePageGetRowBatches ()=0
 
virtual BatchSet< CSCPageGetColumnBatches (Context const *ctx)=0
 
virtual BatchSet< SortedCSCPageGetSortedColumnBatches (Context const *ctx)=0
 
virtual BatchSet< EllpackPage > GetEllpackBatches (Context const *ctx, BatchParam const &param)=0
 
virtual BatchSet< GHistIndexMatrix > GetGradientIndex (Context const *ctx, BatchParam const &param)=0
 
virtual BatchSet< ExtSparsePageGetExtBatches (Context const *ctx, BatchParam const &param)=0
 
virtual bool EllpackExists () const =0
 
virtual bool GHistIndexExists () const =0
 
virtual bool SparsePageExists () const =0
 

Detailed Description

Internal data structured used by XGBoost to hold all external data.

There are multiple variants of the DMatrix class and can be accessed through the Create() methods. The DMatrix itself holds the predictor X, and other data including labels and sample weights are stored in the MetaInfo class.

Constructor & Destructor Documentation

◆ DMatrix()

xgboost::DMatrix::DMatrix ( )
default

default constructor

◆ ~DMatrix()

virtual xgboost::DMatrix::~DMatrix ( )
virtual

Member Function Documentation

◆ Create() [1/4]

template<typename AdapterT >
static DMatrix* xgboost::DMatrix::Create ( AdapterT *  adapter,
float  missing,
int  nthread,
const std::string &  cache_prefix = "",
DataSplitMode  data_split_mode = DataSplitMode::kRow 
)
static

Creates a new DMatrix from an external data adapter.

Template Parameters
AdapterTType of the adapter.
Parameters
[in,out]adapterView onto an external data.
missingValues to count as missing.
nthreadNumber of threads for construction.
cache_prefix(Optional) The cache prefix for external memory.
data_split_mode(Optional) Data split mode.
Returns
a Created DMatrix.

◆ Create() [2/4]

template<typename DataIterHandle , typename DMatrixHandle , typename DataIterResetCallback , typename XGDMatrixCallbackNext >
static DMatrix* xgboost::DMatrix::Create ( DataIterHandle  iter,
DMatrixHandle  proxy,
DataIterResetCallback reset,
XGDMatrixCallbackNext next,
ExtMemConfig const &  config 
)
static

Create an external memory DMatrix with callbacks.

Template Parameters
DataIterHandleExternal iterator type, defined in C API.
DMatrixHandleDMatrix handle, defined in C API.
DataIterResetCallbackCallback for reset, prototype defined in C API.
XGDMatrixCallbackNextCallback for next, prototype defined in C API.
Parameters
iterExternal data iterator
proxyA hanlde to ProxyDMatrix
resetCallback for reset
nextCallback for next
configConfiguration for the cache.
Returns
A created external memory DMatrix.

◆ Create() [3/4]

template<typename DataIterHandle , typename DMatrixHandle , typename DataIterResetCallback , typename XGDMatrixCallbackNext >
static DMatrix* xgboost::DMatrix::Create ( DataIterHandle  iter,
DMatrixHandle  proxy,
std::shared_ptr< DMatrix ref,
DataIterResetCallback reset,
XGDMatrixCallbackNext next,
bst_bin_t  max_bin,
std::int64_t  max_quantile_blocks,
ExtMemConfig const &  config 
)
static

Create an external memory quantile DMatrix with callbacks.

Parameters are a combination of the external memory DMatrix and the quantile DMatrix.
Returns
A created external memory quantile DMatrix.

◆ Create() [4/4]

template<typename DataIterHandle , typename DMatrixHandle , typename DataIterResetCallback , typename XGDMatrixCallbackNext >
static DMatrix* xgboost::DMatrix::Create ( DataIterHandle  iter,
DMatrixHandle  proxy,
std::shared_ptr< DMatrix ref,
DataIterResetCallback reset,
XGDMatrixCallbackNext next,
float  missing,
std::int32_t  nthread,
bst_bin_t  max_bin,
std::int64_t  max_quantile_blocks 
)
static

Create a new Quantile based DMatrix used for histogram based algorithm.

Template Parameters
DataIterHandleExternal iterator type, defined in C API.
DMatrixHandleDMatrix handle, defined in C API.
DataIterResetCallbackCallback for reset, prototype defined in C API.
XGDMatrixCallbackNextCallback for next, prototype defined in C API.
Parameters
iterExternal data iterator
proxyA hanlde to ProxyDMatrix
refReference Quantile DMatrix.
resetCallback for reset
nextCallback for next
missingValue that should be treated as missing.
nthreadnumber of threads used for initialization.
max_binMaximum number of bins.
Returns
A created quantile based DMatrix.

◆ Ctx()

virtual Context const* xgboost::DMatrix::Ctx ( ) const
pure virtual

Get the context object of this DMatrix. The context is created during construction of DMatrix with user specified nthread parameter.

◆ EllpackExists()

virtual bool xgboost::DMatrix::EllpackExists ( ) const
protectedpure virtual

◆ GetBatches() [1/6]

template<typename T >
BatchSet<T> xgboost::DMatrix::GetBatches ( )

Gets batches. Use range based for loop over BatchSet to access individual batches.

◆ GetBatches() [2/6]

template<>
BatchSet<SparsePage> xgboost::DMatrix::GetBatches ( )
inline

◆ GetBatches() [3/6]

template<>
BatchSet< SortedCSCPage > xgboost::DMatrix::GetBatches ( Context const *  ctx)
inline

◆ GetBatches() [4/6]

template<typename T >
BatchSet<T> xgboost::DMatrix::GetBatches ( Context const *  ctx)

◆ GetBatches() [5/6]

template<>
BatchSet< ExtSparsePage > xgboost::DMatrix::GetBatches ( Context const *  ctx,
BatchParam const &  param 
)
inline

◆ GetBatches() [6/6]

template<typename T >
BatchSet<T> xgboost::DMatrix::GetBatches ( Context const *  ctx,
const BatchParam param 
)

◆ GetColumnBatches()

virtual BatchSet<CSCPage> xgboost::DMatrix::GetColumnBatches ( Context const *  ctx)
protectedpure virtual

◆ GetEllpackBatches()

virtual BatchSet<EllpackPage> xgboost::DMatrix::GetEllpackBatches ( Context const *  ctx,
BatchParam const &  param 
)
protectedpure virtual

◆ GetExtBatches()

virtual BatchSet<ExtSparsePage> xgboost::DMatrix::GetExtBatches ( Context const *  ctx,
BatchParam const &  param 
)
protectedpure virtual

◆ GetGradientIndex()

virtual BatchSet<GHistIndexMatrix> xgboost::DMatrix::GetGradientIndex ( Context const *  ctx,
BatchParam const &  param 
)
protectedpure virtual

◆ GetRowBatches()

virtual BatchSet<SparsePage> xgboost::DMatrix::GetRowBatches ( )
protectedpure virtual

◆ GetSortedColumnBatches()

virtual BatchSet<SortedCSCPage> xgboost::DMatrix::GetSortedColumnBatches ( Context const *  ctx)
protectedpure virtual

◆ GetThreadLocal()

XGBAPIThreadLocalEntry& xgboost::DMatrix::GetThreadLocal ( ) const

Get thread local memory for returning data from DMatrix.

◆ GHistIndexExists()

virtual bool xgboost::DMatrix::GHistIndexExists ( ) const
protectedpure virtual

◆ Info() [1/2]

virtual const MetaInfo& xgboost::DMatrix::Info ( ) const
pure virtual

meta information of the dataset

◆ Info() [2/2]

virtual MetaInfo& xgboost::DMatrix::Info ( )
pure virtual

meta information of the dataset

◆ IsDense()

bool xgboost::DMatrix::IsDense ( ) const
inline

Whether the matrix is dense.

◆ Load()

static DMatrix* xgboost::DMatrix::Load ( const std::string &  uri,
bool  silent = true,
DataSplitMode  data_split_mode = DataSplitMode::kRow 
)
static

Load DMatrix from URI.

Parameters
uriThe URI of input.
silentWhether print information during loading.
data_split_modeIndicate how the data was split beforehand.
Returns
The created DMatrix.

◆ NumBatches()

virtual std::int32_t xgboost::DMatrix::NumBatches ( ) const
inlinevirtual

◆ PageExists() [1/4]

template<typename T >
bool xgboost::DMatrix::PageExists ( ) const

◆ PageExists() [2/4]

template<>
bool xgboost::DMatrix::PageExists ( ) const
inline

◆ PageExists() [3/4]

template<>
bool xgboost::DMatrix::PageExists ( ) const
inline

◆ PageExists() [4/4]

template<>
bool xgboost::DMatrix::PageExists ( ) const
inline

◆ SetInfo()

virtual void xgboost::DMatrix::SetInfo ( const char *  key,
std::string const &  interface_str 
)
inlinevirtual

◆ SingleColBlock()

bool xgboost::DMatrix::SingleColBlock ( ) const
inline
Returns
Whether the contains a single batch.

The naming is legacy.

◆ Slice()

virtual DMatrix* xgboost::DMatrix::Slice ( common::Span< int32_t const >  ridxs)
pure virtual

◆ SliceCol()

virtual DMatrix* xgboost::DMatrix::SliceCol ( int  num_slices,
int  slice_id 
)
pure virtual

Slice a DMatrix by columns.

Parameters
num_slicesTotal number of slices
slice_idIndex of the current slice
Returns
DMatrix containing the slice of columns

◆ SparsePageExists()

virtual bool xgboost::DMatrix::SparsePageExists ( ) const
protectedpure virtual

The documentation for this class was generated from the following file: