xgboost
Public Member Functions | Static Public Member Functions | Static Public Attributes | Protected Member Functions | List of all members
xgboost::DMatrix Class Referenceabstract

Internal data structured used by XGBoost during training. More...

#include <data.h>

Collaboration diagram for xgboost::DMatrix:
Collaboration graph

Public Member Functions

 DMatrix ()=default
 default constructor More...
 
virtual MetaInfoInfo ()=0
 meta information of the dataset More...
 
virtual void SetInfo (const char *key, const void *dptr, DataType dtype, size_t num)
 
virtual void SetInfo (const char *key, std::string const &interface_str)
 
virtual const MetaInfoInfo () const =0
 meta information of the dataset More...
 
XGBAPIThreadLocalEntryGetThreadLocal () const
 Get thread local memory for returning data from DMatrix. More...
 
template<typename T >
BatchSet< T > GetBatches (const BatchParam &param={})
 Gets batches. Use range based for loop over BatchSet to access individual batches. More...
 
template<typename T >
bool PageExists () const
 
virtual bool SingleColBlock () const =0
 
virtual ~DMatrix ()
 virtual destructor More...
 
bool IsDense () const
 Whether the matrix is dense. More...
 
virtual DMatrixSlice (common::Span< int32_t const > ridxs)=0
 
template<>
BatchSet< SparsePageGetBatches (const BatchParam &)
 
template<>
bool PageExists () const
 
template<>
bool PageExists () const
 
template<>
BatchSet< CSCPageGetBatches (const BatchParam &)
 
template<>
BatchSet< SortedCSCPageGetBatches (const BatchParam &)
 
template<>
BatchSet< EllpackPageGetBatches (const BatchParam &param)
 

Static Public Member Functions

static DMatrixLoad (const std::string &uri, bool silent, bool load_row_split, const std::string &file_format="auto", size_t page_size=kPageSize)
 Load DMatrix from URI. More...
 
template<typename AdapterT >
static DMatrixCreate (AdapterT *adapter, float missing, int nthread, const std::string &cache_prefix="", size_t page_size=kPageSize)
 Creates a new DMatrix from an external data adapter. More...
 
template<typename DataIterHandle , typename DMatrixHandle , typename DataIterResetCallback , typename XGDMatrixCallbackNext >
static DMatrixCreate (DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset, XGDMatrixCallbackNext *next, float missing, int nthread, int max_bin)
 Create a new Quantile based DMatrix used for histogram based algorithm. More...
 

Static Public Attributes

static const size_t kPageSize = 32UL << 20UL
 page size 32 MB More...
 

Protected Member Functions

virtual BatchSet< SparsePageGetRowBatches ()=0
 
virtual BatchSet< CSCPageGetColumnBatches ()=0
 
virtual BatchSet< SortedCSCPageGetSortedColumnBatches ()=0
 
virtual BatchSet< EllpackPageGetEllpackBatches (const BatchParam &param)=0
 
virtual bool EllpackExists () const =0
 
virtual bool SparsePageExists () const =0
 

Detailed Description

Internal data structured used by XGBoost during training.

Constructor & Destructor Documentation

◆ DMatrix()

xgboost::DMatrix::DMatrix ( )
default

default constructor

◆ ~DMatrix()

virtual xgboost::DMatrix::~DMatrix ( )
virtual

virtual destructor

Member Function Documentation

◆ Create() [1/2]

template<typename AdapterT >
static DMatrix* xgboost::DMatrix::Create ( AdapterT *  adapter,
float  missing,
int  nthread,
const std::string &  cache_prefix = "",
size_t  page_size = kPageSize 
)
static

Creates a new DMatrix from an external data adapter.

Template Parameters
AdapterTType of the adapter.
Parameters
[in,out]adapterView onto an external data.
missingValues to count as missing.
nthreadNumber of threads for construction.
cache_prefix(Optional) The cache prefix for external memory.
page_size(Optional) Size of the page.
Returns
a Created DMatrix.

◆ Create() [2/2]

template<typename DataIterHandle , typename DMatrixHandle , typename DataIterResetCallback , typename XGDMatrixCallbackNext >
static DMatrix* xgboost::DMatrix::Create ( DataIterHandle  iter,
DMatrixHandle  proxy,
DataIterResetCallback reset,
XGDMatrixCallbackNext next,
float  missing,
int  nthread,
int  max_bin 
)
static

Create a new Quantile based DMatrix used for histogram based algorithm.

Template Parameters
DataIterHandleExternal iterator type, defined in C API.
DMatrixHandleDMatrix handle, defined in C API.
DataIterResetCallbackCallback for reset, prototype defined in C API.
XGDMatrixCallbackNextCallback for next, prototype defined in C API.
Parameters
iterExternal data iterator
proxyA hanlde to ProxyDMatrix
resetCallback for reset
nextCallback for next
missingValue that should be treated as missing.
nthreadnumber of threads used for initialization.
max_binMaximum number of bins.
Returns
A created quantile based DMatrix.

◆ EllpackExists()

virtual bool xgboost::DMatrix::EllpackExists ( ) const
protectedpure virtual

◆ GetBatches() [1/5]

template<typename T >
BatchSet<T> xgboost::DMatrix::GetBatches ( const BatchParam param = {})

Gets batches. Use range based for loop over BatchSet to access individual batches.

◆ GetBatches() [2/5]

template<>
BatchSet<SparsePage> xgboost::DMatrix::GetBatches ( const BatchParam )
inline

◆ GetBatches() [3/5]

template<>
BatchSet<CSCPage> xgboost::DMatrix::GetBatches ( const BatchParam )
inline

◆ GetBatches() [4/5]

template<>
BatchSet<SortedCSCPage> xgboost::DMatrix::GetBatches ( const BatchParam )
inline

◆ GetBatches() [5/5]

template<>
BatchSet<EllpackPage> xgboost::DMatrix::GetBatches ( const BatchParam param)
inline

◆ GetColumnBatches()

virtual BatchSet<CSCPage> xgboost::DMatrix::GetColumnBatches ( )
protectedpure virtual

◆ GetEllpackBatches()

virtual BatchSet<EllpackPage> xgboost::DMatrix::GetEllpackBatches ( const BatchParam param)
protectedpure virtual

◆ GetRowBatches()

virtual BatchSet<SparsePage> xgboost::DMatrix::GetRowBatches ( )
protectedpure virtual

◆ GetSortedColumnBatches()

virtual BatchSet<SortedCSCPage> xgboost::DMatrix::GetSortedColumnBatches ( )
protectedpure virtual

◆ GetThreadLocal()

XGBAPIThreadLocalEntry& xgboost::DMatrix::GetThreadLocal ( ) const

Get thread local memory for returning data from DMatrix.

◆ Info() [1/2]

virtual MetaInfo& xgboost::DMatrix::Info ( )
pure virtual

meta information of the dataset

◆ Info() [2/2]

virtual const MetaInfo& xgboost::DMatrix::Info ( ) const
pure virtual

meta information of the dataset

◆ IsDense()

bool xgboost::DMatrix::IsDense ( ) const
inline

Whether the matrix is dense.

◆ Load()

static DMatrix* xgboost::DMatrix::Load ( const std::string &  uri,
bool  silent,
bool  load_row_split,
const std::string &  file_format = "auto",
size_t  page_size = kPageSize 
)
static

Load DMatrix from URI.

Parameters
uriThe URI of input.
silentWhether print information during loading.
load_row_splitFlag to read in part of rows, divided among the workers in distributed mode.
file_formatThe format type of the file, used for dmlc::Parser::Create. By default "auto" will be able to load in both local binary file.
page_sizePage size for external memory.
Returns
The created DMatrix.

◆ PageExists() [1/3]

template<typename T >
bool xgboost::DMatrix::PageExists ( ) const

◆ PageExists() [2/3]

template<>
bool xgboost::DMatrix::PageExists ( ) const
inline

◆ PageExists() [3/3]

template<>
bool xgboost::DMatrix::PageExists ( ) const
inline

◆ SetInfo() [1/2]

virtual void xgboost::DMatrix::SetInfo ( const char *  key,
const void *  dptr,
DataType  dtype,
size_t  num 
)
inlinevirtual

◆ SetInfo() [2/2]

virtual void xgboost::DMatrix::SetInfo ( const char *  key,
std::string const &  interface_str 
)
inlinevirtual

◆ SingleColBlock()

virtual bool xgboost::DMatrix::SingleColBlock ( ) const
pure virtual
Returns
Whether the data columns single column block.

◆ Slice()

virtual DMatrix* xgboost::DMatrix::Slice ( common::Span< int32_t const >  ridxs)
pure virtual

◆ SparsePageExists()

virtual bool xgboost::DMatrix::SparsePageExists ( ) const
protectedpure virtual

Member Data Documentation

◆ kPageSize

const size_t xgboost::DMatrix::kPageSize = 32UL << 20UL
static

page size 32 MB


The documentation for this class was generated from the following file: