xgboost
Public Member Functions | Static Public Member Functions | Protected Member Functions | List of all members
xgboost::DMatrix Class Referenceabstract

Internal data structured used by XGBoost during training. More...

#include <data.h>

Collaboration diagram for xgboost::DMatrix:
Collaboration graph

Public Member Functions

 DMatrix ()=default
 default constructor More...
 
virtual MetaInfoInfo ()=0
 meta information of the dataset More...
 
virtual void SetInfo (const char *key, const void *dptr, DataType dtype, size_t num)
 
virtual void SetInfo (const char *key, std::string const &interface_str)
 
virtual const MetaInfoInfo () const =0
 meta information of the dataset More...
 
XGBAPIThreadLocalEntry & GetThreadLocal () const
 Get thread local memory for returning data from DMatrix. More...
 
virtual Context const * Ctx () const =0
 Get the context object of this DMatrix. The context is created during construction of DMatrix with user specified nthread parameter. More...
 
template<typename T >
BatchSet< T > GetBatches ()
 Gets batches. Use range based for loop over BatchSet to access individual batches. More...
 
template<typename T >
BatchSet< T > GetBatches (Context const *ctx)
 
template<typename T >
BatchSet< T > GetBatches (Context const *ctx, const BatchParam &param)
 
template<typename T >
bool PageExists () const
 
virtual bool SingleColBlock () const =0
 
virtual ~DMatrix ()
 virtual destructor More...
 
bool IsDense () const
 Whether the matrix is dense. More...
 
virtual DMatrixSlice (common::Span< int32_t const > ridxs)=0
 
virtual DMatrixSliceCol (int num_slices, int slice_id)=0
 Slice a DMatrix by columns. More...
 
template<>
BatchSet< SparsePageGetBatches ()
 
template<>
bool PageExists () const
 
template<>
bool PageExists () const
 
template<>
bool PageExists () const
 
template<>
BatchSet< SparsePageGetBatches (Context const *)
 
template<>
BatchSet< EllpackPage > GetBatches (Context const *ctx, BatchParam const &param)
 

Static Public Member Functions

static DMatrixLoad (const std::string &uri, bool silent=true, DataSplitMode data_split_mode=DataSplitMode::kRow)
 Load DMatrix from URI. More...
 
template<typename AdapterT >
static DMatrixCreate (AdapterT *adapter, float missing, int nthread, const std::string &cache_prefix="", DataSplitMode data_split_mode=DataSplitMode::kRow)
 Creates a new DMatrix from an external data adapter. More...
 
template<typename DataIterHandle , typename DMatrixHandle , typename DataIterResetCallback , typename XGDMatrixCallbackNext >
static DMatrixCreate (DataIterHandle iter, DMatrixHandle proxy, std::shared_ptr< DMatrix > ref, DataIterResetCallback *reset, XGDMatrixCallbackNext *next, float missing, int nthread, bst_bin_t max_bin)
 Create a new Quantile based DMatrix used for histogram based algorithm. More...
 
template<typename DataIterHandle , typename DMatrixHandle , typename DataIterResetCallback , typename XGDMatrixCallbackNext >
static DMatrixCreate (DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset, XGDMatrixCallbackNext *next, float missing, int32_t nthread, std::string cache)
 Create an external memory DMatrix with callbacks. More...
 

Protected Member Functions

virtual BatchSet< SparsePageGetRowBatches ()=0
 
virtual BatchSet< CSCPageGetColumnBatches (Context const *ctx)=0
 
virtual BatchSet< SortedCSCPageGetSortedColumnBatches (Context const *ctx)=0
 
virtual BatchSet< EllpackPage > GetEllpackBatches (Context const *ctx, BatchParam const &param)=0
 
virtual BatchSet< GHistIndexMatrix > GetGradientIndex (Context const *ctx, BatchParam const &param)=0
 
virtual BatchSet< ExtSparsePageGetExtBatches (Context const *ctx, BatchParam const &param)=0
 
virtual bool EllpackExists () const =0
 
virtual bool GHistIndexExists () const =0
 
virtual bool SparsePageExists () const =0
 

Detailed Description

Internal data structured used by XGBoost during training.

Constructor & Destructor Documentation

◆ DMatrix()

xgboost::DMatrix::DMatrix ( )
default

default constructor

◆ ~DMatrix()

virtual xgboost::DMatrix::~DMatrix ( )
virtual

virtual destructor

Member Function Documentation

◆ Create() [1/3]

template<typename AdapterT >
static DMatrix* xgboost::DMatrix::Create ( AdapterT *  adapter,
float  missing,
int  nthread,
const std::string &  cache_prefix = "",
DataSplitMode  data_split_mode = DataSplitMode::kRow 
)
static

Creates a new DMatrix from an external data adapter.

Template Parameters
AdapterTType of the adapter.
Parameters
[in,out]adapterView onto an external data.
missingValues to count as missing.
nthreadNumber of threads for construction.
cache_prefix(Optional) The cache prefix for external memory.
data_split_mode(Optional) Data split mode.
Returns
a Created DMatrix.

◆ Create() [2/3]

template<typename DataIterHandle , typename DMatrixHandle , typename DataIterResetCallback , typename XGDMatrixCallbackNext >
static DMatrix* xgboost::DMatrix::Create ( DataIterHandle  iter,
DMatrixHandle  proxy,
DataIterResetCallback reset,
XGDMatrixCallbackNext next,
float  missing,
int32_t  nthread,
std::string  cache 
)
static

Create an external memory DMatrix with callbacks.

Template Parameters
DataIterHandleExternal iterator type, defined in C API.
DMatrixHandleDMatrix handle, defined in C API.
DataIterResetCallbackCallback for reset, prototype defined in C API.
XGDMatrixCallbackNextCallback for next, prototype defined in C API.
Parameters
iterExternal data iterator
proxyA hanlde to ProxyDMatrix
resetCallback for reset
nextCallback for next
missingValue that should be treated as missing.
nthreadnumber of threads used for initialization.
cachePrefix of cache file path.
Returns
A created external memory DMatrix.

◆ Create() [3/3]

template<typename DataIterHandle , typename DMatrixHandle , typename DataIterResetCallback , typename XGDMatrixCallbackNext >
static DMatrix* xgboost::DMatrix::Create ( DataIterHandle  iter,
DMatrixHandle  proxy,
std::shared_ptr< DMatrix ref,
DataIterResetCallback reset,
XGDMatrixCallbackNext next,
float  missing,
int  nthread,
bst_bin_t  max_bin 
)
static

Create a new Quantile based DMatrix used for histogram based algorithm.

Template Parameters
DataIterHandleExternal iterator type, defined in C API.
DMatrixHandleDMatrix handle, defined in C API.
DataIterResetCallbackCallback for reset, prototype defined in C API.
XGDMatrixCallbackNextCallback for next, prototype defined in C API.
Parameters
iterExternal data iterator
proxyA hanlde to ProxyDMatrix
refReference Quantile DMatrix.
resetCallback for reset
nextCallback for next
missingValue that should be treated as missing.
nthreadnumber of threads used for initialization.
max_binMaximum number of bins.
Returns
A created quantile based DMatrix.

◆ Ctx()

virtual Context const* xgboost::DMatrix::Ctx ( ) const
pure virtual

Get the context object of this DMatrix. The context is created during construction of DMatrix with user specified nthread parameter.

◆ EllpackExists()

virtual bool xgboost::DMatrix::EllpackExists ( ) const
protectedpure virtual

◆ GetBatches() [1/6]

template<typename T >
BatchSet<T> xgboost::DMatrix::GetBatches ( )

Gets batches. Use range based for loop over BatchSet to access individual batches.

◆ GetBatches() [2/6]

template<>
BatchSet<SparsePage> xgboost::DMatrix::GetBatches ( )
inline

◆ GetBatches() [3/6]

template<>
BatchSet< SortedCSCPage > xgboost::DMatrix::GetBatches ( Context const *  ctx)
inline

◆ GetBatches() [4/6]

template<typename T >
BatchSet<T> xgboost::DMatrix::GetBatches ( Context const *  ctx)

◆ GetBatches() [5/6]

template<>
BatchSet< ExtSparsePage > xgboost::DMatrix::GetBatches ( Context const *  ctx,
BatchParam const &  param 
)
inline

◆ GetBatches() [6/6]

template<typename T >
BatchSet<T> xgboost::DMatrix::GetBatches ( Context const *  ctx,
const BatchParam param 
)

◆ GetColumnBatches()

virtual BatchSet<CSCPage> xgboost::DMatrix::GetColumnBatches ( Context const *  ctx)
protectedpure virtual

◆ GetEllpackBatches()

virtual BatchSet<EllpackPage> xgboost::DMatrix::GetEllpackBatches ( Context const *  ctx,
BatchParam const &  param 
)
protectedpure virtual

◆ GetExtBatches()

virtual BatchSet<ExtSparsePage> xgboost::DMatrix::GetExtBatches ( Context const *  ctx,
BatchParam const &  param 
)
protectedpure virtual

◆ GetGradientIndex()

virtual BatchSet<GHistIndexMatrix> xgboost::DMatrix::GetGradientIndex ( Context const *  ctx,
BatchParam const &  param 
)
protectedpure virtual

◆ GetRowBatches()

virtual BatchSet<SparsePage> xgboost::DMatrix::GetRowBatches ( )
protectedpure virtual

◆ GetSortedColumnBatches()

virtual BatchSet<SortedCSCPage> xgboost::DMatrix::GetSortedColumnBatches ( Context const *  ctx)
protectedpure virtual

◆ GetThreadLocal()

XGBAPIThreadLocalEntry& xgboost::DMatrix::GetThreadLocal ( ) const

Get thread local memory for returning data from DMatrix.

◆ GHistIndexExists()

virtual bool xgboost::DMatrix::GHistIndexExists ( ) const
protectedpure virtual

◆ Info() [1/2]

virtual const MetaInfo& xgboost::DMatrix::Info ( ) const
pure virtual

meta information of the dataset

◆ Info() [2/2]

virtual MetaInfo& xgboost::DMatrix::Info ( )
pure virtual

meta information of the dataset

◆ IsDense()

bool xgboost::DMatrix::IsDense ( ) const
inline

Whether the matrix is dense.

◆ Load()

static DMatrix* xgboost::DMatrix::Load ( const std::string &  uri,
bool  silent = true,
DataSplitMode  data_split_mode = DataSplitMode::kRow 
)
static

Load DMatrix from URI.

Parameters
uriThe URI of input.
silentWhether print information during loading.
data_split_modeIn distributed mode, split the input according this mode; otherwise, it's just an indicator on how the input was split beforehand.
Returns
The created DMatrix.

◆ PageExists() [1/4]

template<typename T >
bool xgboost::DMatrix::PageExists ( ) const

◆ PageExists() [2/4]

template<>
bool xgboost::DMatrix::PageExists ( ) const
inline

◆ PageExists() [3/4]

template<>
bool xgboost::DMatrix::PageExists ( ) const
inline

◆ PageExists() [4/4]

template<>
bool xgboost::DMatrix::PageExists ( ) const
inline

◆ SetInfo() [1/2]

virtual void xgboost::DMatrix::SetInfo ( const char *  key,
const void *  dptr,
DataType  dtype,
size_t  num 
)
inlinevirtual

◆ SetInfo() [2/2]

virtual void xgboost::DMatrix::SetInfo ( const char *  key,
std::string const &  interface_str 
)
inlinevirtual

◆ SingleColBlock()

virtual bool xgboost::DMatrix::SingleColBlock ( ) const
pure virtual
Returns
Whether the data columns single column block.

◆ Slice()

virtual DMatrix* xgboost::DMatrix::Slice ( common::Span< int32_t const >  ridxs)
pure virtual

◆ SliceCol()

virtual DMatrix* xgboost::DMatrix::SliceCol ( int  num_slices,
int  slice_id 
)
pure virtual

Slice a DMatrix by columns.

Parameters
num_slicesTotal number of slices
slice_idIndex of the current slice
Returns
DMatrix containing the slice of columns

◆ SparsePageExists()

virtual bool xgboost::DMatrix::SparsePageExists ( ) const
protectedpure virtual

The documentation for this class was generated from the following file: