xgboost
c_api.h
Go to the documentation of this file.
1 
7 #ifndef XGBOOST_C_API_H_
8 #define XGBOOST_C_API_H_
9 
10 #ifdef __cplusplus
11 #define XGB_EXTERN_C extern "C"
12 #include <cstdio>
13 #include <cstdint>
14 #else
15 #define XGB_EXTERN_C
16 #include <stdio.h>
17 #include <stdint.h>
18 #endif // __cplusplus
19 
20 #if defined(_MSC_VER) || defined(_WIN32)
21 #define XGB_DLL XGB_EXTERN_C __declspec(dllexport)
22 #else
23 #define XGB_DLL XGB_EXTERN_C __attribute__ ((visibility ("default")))
24 #endif // defined(_MSC_VER) || defined(_WIN32)
25 
26 // manually define unsigned long
27 typedef uint64_t bst_ulong; // NOLINT(*)
28 
30 typedef void *DMatrixHandle; // NOLINT(*)
32 typedef void *BoosterHandle; // NOLINT(*)
33 
43 XGB_DLL void XGBoostVersion(int* major, int* minor, int* patch);
44 
55 XGB_DLL const char *XGBGetLastError(void);
56 
64 XGB_DLL int XGBRegisterLogCallback(void (*callback)(const char*));
65 
73 XGB_DLL int XGDMatrixCreateFromFile(const char *fname,
74  int silent,
75  DMatrixHandle *out);
76 
88 XGB_DLL int XGDMatrixCreateFromCSREx(const size_t* indptr,
89  const unsigned* indices,
90  const float* data,
91  size_t nindptr,
92  size_t nelem,
93  size_t num_col,
94  DMatrixHandle* out);
106 XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t* col_ptr,
107  const unsigned* indices,
108  const float* data,
109  size_t nindptr,
110  size_t nelem,
111  size_t num_row,
112  DMatrixHandle* out);
113 
123 XGB_DLL int XGDMatrixCreateFromMat(const float *data,
124  bst_ulong nrow,
125  bst_ulong ncol,
126  float missing,
127  DMatrixHandle *out);
138 XGB_DLL int XGDMatrixCreateFromMat_omp(const float *data, // NOLINT
139  bst_ulong nrow, bst_ulong ncol,
140  float missing, DMatrixHandle *out,
141  int nthread);
152 XGB_DLL int XGDMatrixCreateFromDT(void** data,
153  const char ** feature_stypes,
154  bst_ulong nrow,
155  bst_ulong ncol,
156  DMatrixHandle* out,
157  int nthread);
158 
159 /*
160  * ========================== Begin data callback APIs =========================
161  *
162  * Short notes for data callback
163  *
164  * There are 2 sets of data callbacks for DMatrix. The first one is currently exclusively
165  * used by JVM packages. It uses `XGBoostBatchCSR` to accept batches for CSR formated
166  * input, and concatenate them into 1 final big CSR. The related functions are:
167  *
168  * - XGBCallbackSetData
169  * - XGBCallbackDataIterNext
170  * - XGDMatrixCreateFromDataIter
171  *
172  * Another set is used by Quantile based DMatrix (used by hist algorithm) for reducing
173  * memory usage. Currently only GPU implementation is available. It accept foreign data
174  * iterators as callbacks and works similar to external memory. For GPU Hist, the data is
175  * first compressed by quantile sketching then merged. This is particular useful for
176  * distributed setting as it eliminates 2 copies of data. 1 by a `concat` from external
177  * library to make the data into a blob for normal DMatrix initialization, another by the
178  * internal CSR copy of DMatrix. Related functions are:
179  *
180  * - XGProxyDMatrixCreate
181  * - XGDMatrixCallbackNext
182  * - DataIterResetCallback
183  * - XGDeviceQuantileDMatrixSetDataCudaArrayInterface
184  * - XGDeviceQuantileDMatrixSetDataCudaColumnar
185  * - ... (data setters)
186  */
187 
188 /* ==== First set of callback functions, used exclusively by JVM packages. ==== */
189 
191 typedef void *DataIterHandle; // NOLINT(*)
193 typedef void *DataHolderHandle; // NOLINT(*)
194 
195 
197 typedef struct { // NOLINT(*)
199  size_t size;
200  /* \brief number of columns in the minibatch. */
201  size_t columns;
203 #ifdef __APPLE__
204  /* Necessary as Java on MacOS defines jlong as long int
205  * and gcc defines int64_t as long long int. */
206  long* offset; // NOLINT(*)
207 #else
208  int64_t* offset; // NOLINT(*)
209 #endif // __APPLE__
210 
211  float* label;
213  float* weight;
215  int* index;
217  float* value;
219 
225 XGB_EXTERN_C typedef int XGBCallbackSetData( // NOLINT(*)
226  DataHolderHandle handle, XGBoostBatchCSR batch);
227 
239 XGB_EXTERN_C typedef int XGBCallbackDataIterNext( // NOLINT(*)
240  DataIterHandle data_handle, XGBCallbackSetData *set_function,
241  DataHolderHandle set_function_handle);
242 
252  DataIterHandle data_handle,
253  XGBCallbackDataIterNext* callback,
254  const char* cache_info,
255  DMatrixHandle *out);
256 
257 /* == Second set of callback functions, used by constructing Quantile based DMatrix. ===
258  *
259  * Short note for how to use the second set of callback for GPU Hist tree method.
260  *
261  * Step 0: Define a data iterator with 2 methods `reset`, and `next`.
262  * Step 1: Create a DMatrix proxy by `XGProxyDMatrixCreate` and hold the handle.
263  * Step 2: Pass the iterator handle, proxy handle and 2 methods into
264  * `XGDeviceQuantileDMatrixCreateFromCallback`.
265  * Step 3: Call appropriate data setters in `next` functions.
266  *
267  * See test_iterative_device_dmatrix.cu or Python interface for examples.
268  */
269 
278 
286 XGB_EXTERN_C typedef int XGDMatrixCallbackNext(DataIterHandle iter); // NOLINT(*)
287 
291 XGB_EXTERN_C typedef void DataIterResetCallback(DataIterHandle handle); // NOLINT(*)
292 
309  XGDMatrixCallbackNext *next, float missing, int nthread, int max_bin,
310  DMatrixHandle *out);
321  DMatrixHandle handle,
322  const char* c_interface_str);
333  DMatrixHandle handle,
334  const char* c_interface_str);
335 /*
336  * ==========================- End data callback APIs ==========================
337  */
338 
339 
340 
350  const int *idxset,
351  bst_ulong len,
352  DMatrixHandle *out);
363  const int *idxset,
364  bst_ulong len,
365  DMatrixHandle *out,
366  int allow_groups);
380  const char *fname, int silent);
381 
390  char const* field,
391  char const* c_interface_str);
392 
402  const char *field,
403  const float *array,
404  bst_ulong len);
414  const char *field,
415  const unsigned *array,
416  bst_ulong len);
417 
443 XGB_DLL int XGDMatrixSetStrFeatureInfo(DMatrixHandle handle, const char *field,
444  const char **features,
445  const bst_ulong size);
446 
482 XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field,
483  bst_ulong *size,
484  const char ***out_features);
485 
511 XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field,
512  void *data, bst_ulong size, int type);
513 
522  const unsigned *group,
523  bst_ulong len);
524 
534  const char *field,
535  bst_ulong* out_len,
536  const float **out_dptr);
546  const char *field,
547  bst_ulong* out_len,
548  const unsigned **out_dptr);
556  bst_ulong *out);
564  bst_ulong *out);
565 // --- start XGBoost class
573 XGB_DLL int XGBoosterCreate(const DMatrixHandle dmats[],
574  bst_ulong len,
575  BoosterHandle *out);
582 
596 XGB_DLL int XGBoosterSlice(BoosterHandle handle, int begin_layer,
597  int end_layer, int step,
598  BoosterHandle *out);
599 
608  const char *name,
609  const char *value);
610 
617  bst_ulong *out);
618 
627  int iter,
628  DMatrixHandle dtrain);
640  DMatrixHandle dtrain,
641  float *grad,
642  float *hess,
643  bst_ulong len);
655  int iter,
656  DMatrixHandle dmats[],
657  const char *evnames[],
658  bst_ulong len,
659  const char **out_result);
684  DMatrixHandle dmat,
685  int option_mask,
686  unsigned ntree_limit,
687  int training,
688  bst_ulong *out_len,
689  const float **out_result);
690 
691 /*
692  * ========================== Begin Serialization APIs =========================
693  */
694 /*
695  * Short note for serialization APIs. There are 3 different sets of serialization API.
696  *
697  * - Functions with the term "Model" handles saving/loading XGBoost model like trees or
698  * linear weights. Striping out parameters configuration like training algorithms or
699  * CUDA device ID. These functions are designed to let users reuse the trained model
700  * for different tasks, examples are prediction, training continuation or model
701  * interpretation.
702  *
703  * - Functions with the term "Config" handles save/loading configuration. It helps user
704  * to study the internal of XGBoost. Also user can use the load method for specifying
705  * paramters in a structured way. These functions are introduced in 1.0.0, and are not
706  * yet stable.
707  *
708  * - Functions with the term "Serialization" are combined of above two. They are used in
709  * situations like check-pointing, or continuing training task in distributed
710  * environment. In these cases the task must be carried out without any user
711  * intervention.
712  */
713 
721  const char *fname);
729  const char *fname);
738  const void *buf,
739  bst_ulong len);
749  const char **out_dptr);
750 
761  const char **out_dptr);
772  const void *buf, bst_ulong len);
773 
782  int* version);
783 
790 
791 
805  char const **out_str);
816  char const *json_parameters);
817 /*
818  * =========================== End Serialization APIs ==========================
819  */
820 
821 
832  const char *fmap,
833  int with_stats,
834  bst_ulong *out_len,
835  const char ***out_dump_array);
836 
848  const char *fmap,
849  int with_stats,
850  const char *format,
851  bst_ulong *out_len,
852  const char ***out_dump_array);
853 
866  int fnum,
867  const char **fname,
868  const char **ftype,
869  int with_stats,
870  bst_ulong *out_len,
871  const char ***out_models);
872 
886  int fnum,
887  const char **fname,
888  const char **ftype,
889  int with_stats,
890  const char *format,
891  bst_ulong *out_len,
892  const char ***out_models);
893 
903  const char* key,
904  const char** out,
905  int *success);
916  const char* key,
917  const char* value);
926  bst_ulong* out_len,
927  const char*** out);
928 #endif // XGBOOST_C_API_H_
XGBoosterLoadRabitCheckpoint
XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle, int *version)
Initialize the booster from rabit checkpoint. This is used in distributed training API.
XGDMatrixCreateFromDT
XGB_DLL int XGDMatrixCreateFromDT(void **data, const char **feature_stypes, bst_ulong nrow, bst_ulong ncol, DMatrixHandle *out, int nthread)
create matrix content from python data table
XGDeviceQuantileDMatrixSetDataCudaArrayInterface
XGB_DLL int XGDeviceQuantileDMatrixSetDataCudaArrayInterface(DMatrixHandle handle, const char *c_interface_str)
Set data on a DMatrix proxy.
XGBoosterLoadJsonConfig
XGB_DLL int XGBoosterLoadJsonConfig(BoosterHandle handle, char const *json_parameters)
Load XGBoost's internal configuration from a JSON document. Currently the support is experimental,...
XGDMatrixCreateFromFile
XGB_DLL int XGDMatrixCreateFromFile(const char *fname, int silent, DMatrixHandle *out)
load a data matrix
XGBCallbackSetData
XGB_EXTERN_C typedef int XGBCallbackSetData(DataHolderHandle handle, XGBoostBatchCSR batch)
Callback to set the data to handle,.
XGB_EXTERN_C
#define XGB_EXTERN_C
Definition: c_api.h:15
XGBRegisterLogCallback
XGB_DLL int XGBRegisterLogCallback(void(*callback)(const char *))
register callback function for LOG(INFO) messages – helpful messages that are not errors....
XGBoosterUnserializeFromBuffer
XGB_DLL int XGBoosterUnserializeFromBuffer(BoosterHandle handle, const void *buf, bst_ulong len)
Memory snapshot based serialization method. Loads the buffer returned from ‘XGBoosterSerializeToBuffe...
XGBoosterPredict
XGB_DLL int XGBoosterPredict(BoosterHandle handle, DMatrixHandle dmat, int option_mask, unsigned ntree_limit, int training, bst_ulong *out_len, const float **out_result)
make prediction based on dmat
XGDMatrixNumCol
XGB_DLL int XGDMatrixNumCol(DMatrixHandle handle, bst_ulong *out)
get number of columns
XGDMatrixCreateFromCSCEx
XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t *col_ptr, const unsigned *indices, const float *data, size_t nindptr, size_t nelem, size_t num_row, DMatrixHandle *out)
create a matrix content from CSC format
XGDMatrixSliceDMatrixEx
XGB_DLL int XGDMatrixSliceDMatrixEx(DMatrixHandle handle, const int *idxset, bst_ulong len, DMatrixHandle *out, int allow_groups)
create a new dmatrix from sliced content of existing matrix
XGBoostBatchCSR::index
int * index
feature index
Definition: c_api.h:215
XGDMatrixSetInfoFromInterface
XGB_DLL int XGDMatrixSetInfoFromInterface(DMatrixHandle handle, char const *field, char const *c_interface_str)
Set content in array interface to a content in info.
XGBoosterSaveModel
XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char *fname)
Save model into existing file.
XGDeviceQuantileDMatrixCreateFromCallback
XGB_DLL int XGDeviceQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset, XGDMatrixCallbackNext *next, float missing, int nthread, int max_bin, DMatrixHandle *out)
Create a device DMatrix with data iterator.
XGBoostBatchCSR
Mini batch used in XGBoost Data Iteration.
Definition: c_api.h:197
XGDMatrixSaveBinary
XGB_DLL int XGDMatrixSaveBinary(DMatrixHandle handle, const char *fname, int silent)
load a data matrix into binary file
XGDMatrixSliceDMatrix
XGB_DLL int XGDMatrixSliceDMatrix(DMatrixHandle handle, const int *idxset, bst_ulong len, DMatrixHandle *out)
create a new dmatrix from sliced content of existing matrix
XGBoostBatchCSR::size
size_t size
number of rows in the minibatch
Definition: c_api.h:199
XGBoosterSetParam
XGB_DLL int XGBoosterSetParam(BoosterHandle handle, const char *name, const char *value)
set parameters
XGBoostBatchCSR::value
float * value
feature values
Definition: c_api.h:217
XGBGetLastError
const XGB_DLL char * XGBGetLastError(void)
get string message of the last error
DataIterHandle
void * DataIterHandle
handle to a external data iterator
Definition: c_api.h:191
XGDMatrixCreateFromMat_omp
XGB_DLL int XGDMatrixCreateFromMat_omp(const float *data, bst_ulong nrow, bst_ulong ncol, float missing, DMatrixHandle *out, int nthread)
create matrix content from dense matrix
XGBoosterDumpModelEx
XGB_DLL int XGBoosterDumpModelEx(BoosterHandle handle, const char *fmap, int with_stats, const char *format, bst_ulong *out_len, const char ***out_dump_array)
dump model, return array of strings representing model dump
XGBoosterDumpModel
XGB_DLL int XGBoosterDumpModel(BoosterHandle handle, const char *fmap, int with_stats, bst_ulong *out_len, const char ***out_dump_array)
dump model, return array of strings representing model dump
XGDMatrixCallbackNext
XGB_EXTERN_C typedef int XGDMatrixCallbackNext(DataIterHandle iter)
Callback function prototype for getting next batch of data.
XGBoostBatchCSR::label
float * label
labels of each instance
Definition: c_api.h:211
XGB_DLL
#define XGB_DLL
Definition: c_api.h:23
XGDMatrixGetFloatInfo
XGB_DLL int XGDMatrixGetFloatInfo(const DMatrixHandle handle, const char *field, bst_ulong *out_len, const float **out_dptr)
get float info vector from matrix.
XGBoosterFree
XGB_DLL int XGBoosterFree(BoosterHandle handle)
free obj in handle
XGBoosterGetNumFeature
XGB_DLL int XGBoosterGetNumFeature(BoosterHandle handle, bst_ulong *out)
get number of features
XGDMatrixSetUIntInfo
XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, const char *field, const unsigned *array, bst_ulong len)
set uint32 vector to a content in info
DataIterResetCallback
XGB_EXTERN_C typedef void DataIterResetCallback(DataIterHandle handle)
Callback function prototype for reseting external iterator.
XGBoostVersion
XGB_DLL void XGBoostVersion(int *major, int *minor, int *patch)
Return the version of the XGBoost library being currently used.
XGDMatrixNumRow
XGB_DLL int XGDMatrixNumRow(DMatrixHandle handle, bst_ulong *out)
get number of rows.
XGBoosterLoadModelFromBuffer
XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle, const void *buf, bst_ulong len)
load model from in memory buffer
XGBoosterDumpModelWithFeatures
XGB_DLL int XGBoosterDumpModelWithFeatures(BoosterHandle handle, int fnum, const char **fname, const char **ftype, int with_stats, bst_ulong *out_len, const char ***out_models)
dump model, return array of strings representing model dump
DataHolderHandle
void * DataHolderHandle
handle to a internal data holder.
Definition: c_api.h:193
XGBCallbackDataIterNext
XGB_EXTERN_C typedef int XGBCallbackDataIterNext(DataIterHandle data_handle, XGBCallbackSetData *set_function, DataHolderHandle set_function_handle)
The data reading callback function. The iterator will be able to give subset of batch in the data.
XGBoostBatchCSR::columns
size_t columns
Definition: c_api.h:201
XGDMatrixSetGroup
XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle, const unsigned *group, bst_ulong len)
(deprecated) Use XGDMatrixSetUIntInfo instead. Set group of the training matrix
XGBoosterSaveRabitCheckpoint
XGB_DLL int XGBoosterSaveRabitCheckpoint(BoosterHandle handle)
Save the current checkpoint to rabit.
XGDMatrixSetStrFeatureInfo
XGB_DLL int XGDMatrixSetStrFeatureInfo(DMatrixHandle handle, const char *field, const char **features, const bst_ulong size)
Set string encoded information of all features.
XGBoosterGetAttrNames
XGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle, bst_ulong *out_len, const char ***out)
Get the names of all attribute from Booster.
XGDMatrixSetFloatInfo
XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, const char *field, const float *array, bst_ulong len)
set float vector to a content in info
XGBoosterGetModelRaw
XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle, bst_ulong *out_len, const char **out_dptr)
save model into binary raw bytes, return header of the array user must copy the result out,...
XGBoostBatchCSR::offset
int64_t * offset
row pointer to the rows in the data
Definition: c_api.h:208
XGBoosterLoadModel
XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char *fname)
Load model from existing file.
XGDMatrixSetDenseInfo
XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, void *data, bst_ulong size, int type)
Set meta info from dense matrix. Valid field names are:
XGBoosterBoostOneIter
XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle, DMatrixHandle dtrain, float *grad, float *hess, bst_ulong len)
update the model, by directly specify gradient and second order gradient, this can be used to replace...
XGBoosterGetAttr
XGB_DLL int XGBoosterGetAttr(BoosterHandle handle, const char *key, const char **out, int *success)
Get string attribute from Booster.
XGDeviceQuantileDMatrixSetDataCudaColumnar
XGB_DLL int XGDeviceQuantileDMatrixSetDataCudaColumnar(DMatrixHandle handle, const char *c_interface_str)
Set data on a DMatrix proxy.
XGBoostBatchCSR::weight
float * weight
weight of each instance, can be NULL
Definition: c_api.h:213
XGBoosterUpdateOneIter
XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle, int iter, DMatrixHandle dtrain)
update the model in one round using dtrain
DMatrixHandle
void * DMatrixHandle
handle to DMatrix
Definition: c_api.h:30
BoosterHandle
void * BoosterHandle
handle to Booster
Definition: c_api.h:32
XGBoosterSaveJsonConfig
XGB_DLL int XGBoosterSaveJsonConfig(BoosterHandle handle, bst_ulong *out_len, char const **out_str)
Save XGBoost's internal configuration into a JSON document. Currently the support is experimental,...
XGDMatrixCreateFromCSREx
XGB_DLL int XGDMatrixCreateFromCSREx(const size_t *indptr, const unsigned *indices, const float *data, size_t nindptr, size_t nelem, size_t num_col, DMatrixHandle *out)
create a matrix content from CSR format
XGBoosterSetAttr
XGB_DLL int XGBoosterSetAttr(BoosterHandle handle, const char *key, const char *value)
Set or delete string attribute.
XGBoosterEvalOneIter
XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle, int iter, DMatrixHandle dmats[], const char *evnames[], bst_ulong len, const char **out_result)
get evaluation statistics for xgboost
XGDMatrixCreateFromDataIter
XGB_DLL int XGDMatrixCreateFromDataIter(DataIterHandle data_handle, XGBCallbackDataIterNext *callback, const char *cache_info, DMatrixHandle *out)
Create a DMatrix from a data iterator.
bst_ulong
uint64_t bst_ulong
Definition: c_api.h:27
XGProxyDMatrixCreate
XGB_DLL int XGProxyDMatrixCreate(DMatrixHandle *out)
Create a DMatrix proxy for setting data, can be free by XGDMatrixFree.
XGDMatrixGetUIntInfo
XGB_DLL int XGDMatrixGetUIntInfo(const DMatrixHandle handle, const char *field, bst_ulong *out_len, const unsigned **out_dptr)
get uint32 info vector from matrix
XGBoosterSlice
XGB_DLL int XGBoosterSlice(BoosterHandle handle, int begin_layer, int end_layer, int step, BoosterHandle *out)
Slice a model using boosting index. The slice m:n indicates taking all trees that were fit during the...
XGDMatrixFree
XGB_DLL int XGDMatrixFree(DMatrixHandle handle)
free space in data matrix
XGBoosterSerializeToBuffer
XGB_DLL int XGBoosterSerializeToBuffer(BoosterHandle handle, bst_ulong *out_len, const char **out_dptr)
Memory snapshot based serialization method. Saves everything states into buffer.
XGDMatrixGetStrFeatureInfo
XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field, bst_ulong *size, const char ***out_features)
Get string encoded information of all features.
XGDMatrixCreateFromMat
XGB_DLL int XGDMatrixCreateFromMat(const float *data, bst_ulong nrow, bst_ulong ncol, float missing, DMatrixHandle *out)
create matrix content from dense matrix
XGBoosterDumpModelExWithFeatures
XGB_DLL int XGBoosterDumpModelExWithFeatures(BoosterHandle handle, int fnum, const char **fname, const char **ftype, int with_stats, const char *format, bst_ulong *out_len, const char ***out_models)
dump model, return array of strings representing model dump
XGBoosterCreate
XGB_DLL int XGBoosterCreate(const DMatrixHandle dmats[], bst_ulong len, BoosterHandle *out)
create xgboost learner