xgboost
c_api.h
Go to the documentation of this file.
1 
7 #ifndef XGBOOST_C_API_H_
8 #define XGBOOST_C_API_H_
9 
10 #ifdef __cplusplus
11 #define XGB_EXTERN_C extern "C"
12 #include <cstdio>
13 #include <cstdint>
14 #else
15 #define XGB_EXTERN_C
16 #include <stdio.h>
17 #include <stdint.h>
18 #endif // __cplusplus
19 
20 #if defined(_MSC_VER) || defined(_WIN32)
21 #define XGB_DLL XGB_EXTERN_C __declspec(dllexport)
22 #else
23 #define XGB_DLL XGB_EXTERN_C __attribute__ ((visibility ("default")))
24 #endif // defined(_MSC_VER) || defined(_WIN32)
25 
26 // manually define unsigned long
27 typedef uint64_t bst_ulong; // NOLINT(*)
28 
30 typedef void *DMatrixHandle; // NOLINT(*)
32 typedef void *BoosterHandle; // NOLINT(*)
33 
43 XGB_DLL void XGBoostVersion(int* major, int* minor, int* patch);
44 
55 XGB_DLL const char *XGBGetLastError(void);
56 
64 XGB_DLL int XGBRegisterLogCallback(void (*callback)(const char*));
65 
73 XGB_DLL int XGDMatrixCreateFromFile(const char *fname,
74  int silent,
75  DMatrixHandle *out);
76 
88 XGB_DLL int XGDMatrixCreateFromCSREx(const size_t* indptr,
89  const unsigned* indices,
90  const float* data,
91  size_t nindptr,
92  size_t nelem,
93  size_t num_col,
94  DMatrixHandle* out);
106 XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t* col_ptr,
107  const unsigned* indices,
108  const float* data,
109  size_t nindptr,
110  size_t nelem,
111  size_t num_row,
112  DMatrixHandle* out);
113 
123 XGB_DLL int XGDMatrixCreateFromMat(const float *data,
124  bst_ulong nrow,
125  bst_ulong ncol,
126  float missing,
127  DMatrixHandle *out);
138 XGB_DLL int XGDMatrixCreateFromMat_omp(const float *data, // NOLINT
139  bst_ulong nrow, bst_ulong ncol,
140  float missing, DMatrixHandle *out,
141  int nthread);
152 XGB_DLL int XGDMatrixCreateFromDT(void** data,
153  const char ** feature_stypes,
154  bst_ulong nrow,
155  bst_ulong ncol,
156  DMatrixHandle* out,
157  int nthread);
158 
159 /*
160  * ========================== Begin data callback APIs =========================
161  *
162  * Short notes for data callback
163  *
164  * There are 2 sets of data callbacks for DMatrix. The first one is currently exclusively
165  * used by JVM packages. It uses `XGBoostBatchCSR` to accept batches for CSR formated
166  * input, and concatenate them into 1 final big CSR. The related functions are:
167  *
168  * - XGBCallbackSetData
169  * - XGBCallbackDataIterNext
170  * - XGDMatrixCreateFromDataIter
171  *
172  * Another set is used by Quantile based DMatrix (used by hist algorithm) for reducing
173  * memory usage. Currently only GPU implementation is available. It accept foreign data
174  * iterators as callbacks and works similar to external memory. For GPU Hist, the data is
175  * first compressed by quantile sketching then merged. This is particular useful for
176  * distributed setting as it eliminates 2 copies of data. 1 by a `concat` from external
177  * library to make the data into a blob for normal DMatrix initialization, another by the
178  * internal CSR copy of DMatrix. Related functions are:
179  *
180  * - XGProxyDMatrixCreate
181  * - XGDMatrixCallbackNext
182  * - DataIterResetCallback
183  * - XGDeviceQuantileDMatrixSetDataCudaArrayInterface
184  * - XGDeviceQuantileDMatrixSetDataCudaColumnar
185  * - ... (data setters)
186  */
187 
188 /* ==== First set of callback functions, used exclusively by JVM packages. ==== */
189 
191 typedef void *DataIterHandle; // NOLINT(*)
193 typedef void *DataHolderHandle; // NOLINT(*)
194 
195 
197 typedef struct { // NOLINT(*)
199  size_t size;
200  /* \brief number of columns in the minibatch. */
201  size_t columns;
203 #ifdef __APPLE__
204  /* Necessary as Java on MacOS defines jlong as long int
205  * and gcc defines int64_t as long long int. */
206  long* offset; // NOLINT(*)
207 #else
208  int64_t* offset; // NOLINT(*)
209 #endif // __APPLE__
210 
211  float* label;
213  float* weight;
215  int* index;
217  float* value;
219 
225 XGB_EXTERN_C typedef int XGBCallbackSetData( // NOLINT(*)
226  DataHolderHandle handle, XGBoostBatchCSR batch);
227 
239 XGB_EXTERN_C typedef int XGBCallbackDataIterNext( // NOLINT(*)
240  DataIterHandle data_handle, XGBCallbackSetData *set_function,
241  DataHolderHandle set_function_handle);
242 
252  DataIterHandle data_handle,
253  XGBCallbackDataIterNext* callback,
254  const char* cache_info,
255  DMatrixHandle *out);
256 
257 /* == Second set of callback functions, used by constructing Quantile based DMatrix. ===
258  *
259  * Short note for how to use the second set of callback for GPU Hist tree method.
260  *
261  * Step 0: Define a data iterator with 2 methods `reset`, and `next`.
262  * Step 1: Create a DMatrix proxy by `XGProxyDMatrixCreate` and hold the handle.
263  * Step 2: Pass the iterator handle, proxy handle and 2 methods into
264  * `XGDeviceQuantileDMatrixCreateFromCallback`.
265  * Step 3: Call appropriate data setters in `next` functions.
266  *
267  * See test_iterative_device_dmatrix.cu or Python interface for examples.
268  */
269 
278 
286 XGB_EXTERN_C typedef int XGDMatrixCallbackNext(DataIterHandle iter); // NOLINT(*)
287 
291 XGB_EXTERN_C typedef void DataIterResetCallback(DataIterHandle handle); // NOLINT(*)
292 
308  DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset,
309  XGDMatrixCallbackNext *next, float missing, int nthread, int max_bin,
310  DMatrixHandle *out);
321  DMatrixHandle handle,
322  const char* c_interface_str);
333  DMatrixHandle handle,
334  const char* c_interface_str);
335 /*
336  * ==========================- End data callback APIs ==========================
337  */
338 
339 
340 
350  const int *idxset,
351  bst_ulong len,
352  DMatrixHandle *out);
363  const int *idxset,
364  bst_ulong len,
365  DMatrixHandle *out,
366  int allow_groups);
380  const char *fname, int silent);
381 
390  char const* field,
391  char const* c_interface_str);
392 
402  const char *field,
403  const float *array,
404  bst_ulong len);
414  const char *field,
415  const unsigned *array,
416  bst_ulong len);
417 
443 XGB_DLL int XGDMatrixSetStrFeatureInfo(DMatrixHandle handle, const char *field,
444  const char **features,
445  const bst_ulong size);
446 
482 XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field,
483  bst_ulong *size,
484  const char ***out_features);
485 
494  const unsigned *group,
495  bst_ulong len);
496 
506  const char *field,
507  bst_ulong* out_len,
508  const float **out_dptr);
518  const char *field,
519  bst_ulong* out_len,
520  const unsigned **out_dptr);
528  bst_ulong *out);
536  bst_ulong *out);
537 // --- start XGBoost class
545 XGB_DLL int XGBoosterCreate(const DMatrixHandle dmats[],
546  bst_ulong len,
547  BoosterHandle *out);
554 
563  const char *name,
564  const char *value);
565 
572  bst_ulong *out);
573 
582  int iter,
583  DMatrixHandle dtrain);
595  DMatrixHandle dtrain,
596  float *grad,
597  float *hess,
598  bst_ulong len);
610  int iter,
611  DMatrixHandle dmats[],
612  const char *evnames[],
613  bst_ulong len,
614  const char **out_result);
639  DMatrixHandle dmat,
640  int option_mask,
641  unsigned ntree_limit,
642  int training,
643  bst_ulong *out_len,
644  const float **out_result);
645 
646 /*
647  * ========================== Begin Serialization APIs =========================
648  */
649 /*
650  * Short note for serialization APIs. There are 3 different sets of serialization API.
651  *
652  * - Functions with the term "Model" handles saving/loading XGBoost model like trees or
653  * linear weights. Striping out parameters configuration like training algorithms or
654  * CUDA device ID. These functions are designed to let users reuse the trained model
655  * for different tasks, examples are prediction, training continuation or model
656  * interpretation.
657  *
658  * - Functions with the term "Config" handles save/loading configuration. It helps user
659  * to study the internal of XGBoost. Also user can use the load method for specifying
660  * paramters in a structured way. These functions are introduced in 1.0.0, and are not
661  * yet stable.
662  *
663  * - Functions with the term "Serialization" are combined of above two. They are used in
664  * situations like check-pointing, or continuing training task in distributed
665  * environment. In these cases the task must be carried out without any user
666  * intervention.
667  */
668 
676  const char *fname);
684  const char *fname);
693  const void *buf,
694  bst_ulong len);
704  const char **out_dptr);
705 
716  const char **out_dptr);
727  const void *buf, bst_ulong len);
728 
737  int* version);
738 
745 
746 
760  char const **out_str);
771  char const *json_parameters);
772 /*
773  * =========================== End Serialization APIs ==========================
774  */
775 
776 
787  const char *fmap,
788  int with_stats,
789  bst_ulong *out_len,
790  const char ***out_dump_array);
791 
803  const char *fmap,
804  int with_stats,
805  const char *format,
806  bst_ulong *out_len,
807  const char ***out_dump_array);
808 
821  int fnum,
822  const char **fname,
823  const char **ftype,
824  int with_stats,
825  bst_ulong *out_len,
826  const char ***out_models);
827 
841  int fnum,
842  const char **fname,
843  const char **ftype,
844  int with_stats,
845  const char *format,
846  bst_ulong *out_len,
847  const char ***out_models);
848 
858  const char* key,
859  const char** out,
860  int *success);
871  const char* key,
872  const char* value);
881  bst_ulong* out_len,
882  const char*** out);
883 #endif // XGBOOST_C_API_H_
void * BoosterHandle
handle to Booster
Definition: c_api.h:32
XGB_DLL int XGBoosterSetParam(BoosterHandle handle, const char *name, const char *value)
set parameters
XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle, bst_ulong *out_len, const char **out_dptr)
save model into binary raw bytes, return header of the array user must copy the result out...
#define XGB_DLL
Definition: c_api.h:23
int64_t * offset
row pointer to the rows in the data
Definition: c_api.h:208
void * DataIterHandle
handle to a external data iterator
Definition: c_api.h:191
XGB_DLL int XGBoosterGetNumFeature(BoosterHandle handle, bst_ulong *out)
get number of features
XGB_DLL int XGBoosterDumpModelEx(BoosterHandle handle, const char *fmap, int with_stats, const char *format, bst_ulong *out_len, const char ***out_dump_array)
dump model, return array of strings representing model dump
XGB_EXTERN_C typedef void DataIterResetCallback(DataIterHandle handle)
Callback function prototype for reseting external iterator.
XGB_DLL int XGBoosterDumpModel(BoosterHandle handle, const char *fmap, int with_stats, bst_ulong *out_len, const char ***out_dump_array)
dump model, return array of strings representing model dump
XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle, int iter, DMatrixHandle dmats[], const char *evnames[], bst_ulong len, const char **out_result)
get evaluation statistics for xgboost
XGB_DLL int XGDMatrixCreateFromDataIter(DataIterHandle data_handle, XGBCallbackDataIterNext *callback, const char *cache_info, DMatrixHandle *out)
Create a DMatrix from a data iterator.
void * DMatrixHandle
handle to DMatrix
Definition: c_api.h:30
XGB_DLL int XGBoosterFree(BoosterHandle handle)
free obj in handle
XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, const char *field, const unsigned *array, bst_ulong len)
set uint32 vector to a content in info
Mini batch used in XGBoost Data Iteration.
Definition: c_api.h:197
XGB_DLL int XGBoosterDumpModelWithFeatures(BoosterHandle handle, int fnum, const char **fname, const char **ftype, int with_stats, bst_ulong *out_len, const char ***out_models)
dump model, return array of strings representing model dump
XGB_DLL void XGBoostVersion(int *major, int *minor, int *patch)
Return the version of the XGBoost library being currently used.
XGB_DLL int XGDMatrixFree(DMatrixHandle handle)
free space in data matrix
XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle, const unsigned *group, bst_ulong len)
(deprecated) Use XGDMatrixSetUIntInfo instead. Set group of the training matrix
uint64_t bst_ulong
Definition: c_api.h:27
XGB_DLL int XGDMatrixCreateFromDT(void **data, const char **feature_stypes, bst_ulong nrow, bst_ulong ncol, DMatrixHandle *out, int nthread)
create matrix content from python data table
size_t size
number of rows in the minibatch
Definition: c_api.h:199
void * DataHolderHandle
handle to a internal data holder.
Definition: c_api.h:193
XGB_EXTERN_C typedef int XGBCallbackSetData(DataHolderHandle handle, XGBoostBatchCSR batch)
Callback to set the data to handle,.
XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, const char *field, const float *array, bst_ulong len)
set float vector to a content in info
#define XGB_EXTERN_C
Definition: c_api.h:15
XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char *fname)
Load model from existing file.
XGB_DLL int XGDMatrixSetStrFeatureInfo(DMatrixHandle handle, const char *field, const char **features, const bst_ulong size)
Set string encoded information of all features.
XGB_DLL int XGBoosterLoadJsonConfig(BoosterHandle handle, char const *json_parameters)
Load XGBoost&#39;s internal configuration from a JSON document. Currently the support is experimental...
XGB_DLL int XGDMatrixCreateFromFile(const char *fname, int silent, DMatrixHandle *out)
load a data matrix
XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle, int iter, DMatrixHandle dtrain)
update the model in one round using dtrain
XGB_DLL int XGBoosterGetAttr(BoosterHandle handle, const char *key, const char **out, int *success)
Get string attribute from Booster.
XGB_DLL int XGDMatrixCreateFromCSREx(const size_t *indptr, const unsigned *indices, const float *data, size_t nindptr, size_t nelem, size_t num_col, DMatrixHandle *out)
create a matrix content from CSR format
XGB_DLL int XGBoosterSaveJsonConfig(BoosterHandle handle, bst_ulong *out_len, char const **out_str)
Save XGBoost&#39;s internal configuration into a JSON document. Currently the support is experimental...
float * weight
weight of each instance, can be NULL
Definition: c_api.h:213
XGB_DLL int XGDMatrixSaveBinary(DMatrixHandle handle, const char *fname, int silent)
load a data matrix into binary file
XGB_DLL int XGProxyDMatrixCreate(DMatrixHandle *out)
Create a DMatrix proxy for setting data, can be free by XGDMatrixFree.
XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t *col_ptr, const unsigned *indices, const float *data, size_t nindptr, size_t nelem, size_t num_row, DMatrixHandle *out)
create a matrix content from CSC format
XGB_DLL int XGDMatrixGetUIntInfo(const DMatrixHandle handle, const char *field, bst_ulong *out_len, const unsigned **out_dptr)
get uint32 info vector from matrix
XGB_DLL const char * XGBGetLastError(void)
get string message of the last error
XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char *fname)
Save model into existing file.
XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field, bst_ulong *size, const char ***out_features)
Get string encoded information of all features.
XGB_DLL int XGBoosterSetAttr(BoosterHandle handle, const char *key, const char *value)
Set or delete string attribute.
XGB_DLL int XGBoosterCreate(const DMatrixHandle dmats[], bst_ulong len, BoosterHandle *out)
create xgboost learner
float * label
labels of each instance
Definition: c_api.h:211
XGB_DLL int XGDMatrixSliceDMatrix(DMatrixHandle handle, const int *idxset, bst_ulong len, DMatrixHandle *out)
create a new dmatrix from sliced content of existing matrix
float * value
feature values
Definition: c_api.h:217
XGB_DLL int XGDMatrixGetFloatInfo(const DMatrixHandle handle, const char *field, bst_ulong *out_len, const float **out_dptr)
get float info vector from matrix.
XGB_DLL int XGBoosterSerializeToBuffer(BoosterHandle handle, bst_ulong *out_len, const char **out_dptr)
Memory snapshot based serialization method. Saves everything states into buffer.
XGB_DLL int XGDMatrixCreateFromMat(const float *data, bst_ulong nrow, bst_ulong ncol, float missing, DMatrixHandle *out)
create matrix content from dense matrix
XGB_DLL int XGBRegisterLogCallback(void(*callback)(const char *))
register callback function for LOG(INFO) messages – helpful messages that are not errors...
XGB_DLL int XGBoosterDumpModelExWithFeatures(BoosterHandle handle, int fnum, const char **fname, const char **ftype, int with_stats, const char *format, bst_ulong *out_len, const char ***out_models)
dump model, return array of strings representing model dump
XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle, int *version)
Initialize the booster from rabit checkpoint. This is used in distributed training API...
XGB_EXTERN_C typedef int XGDMatrixCallbackNext(DataIterHandle iter)
Callback function prototype for getting next batch of data.
XGB_DLL int XGDeviceQuantileDMatrixSetDataCudaArrayInterface(DMatrixHandle handle, const char *c_interface_str)
Set data on a DMatrix proxy.
XGB_DLL int XGBoosterPredict(BoosterHandle handle, DMatrixHandle dmat, int option_mask, unsigned ntree_limit, int training, bst_ulong *out_len, const float **out_result)
make prediction based on dmat
XGB_DLL int XGBoosterUnserializeFromBuffer(BoosterHandle handle, const void *buf, bst_ulong len)
Memory snapshot based serialization method. Loads the buffer returned from `XGBoosterSerializeToBuffe...
XGB_DLL int XGDMatrixNumRow(DMatrixHandle handle, bst_ulong *out)
get number of rows.
XGB_DLL int XGDMatrixSetInfoFromInterface(DMatrixHandle handle, char const *field, char const *c_interface_str)
Set content in array interface to a content in info.
XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle, const void *buf, bst_ulong len)
load model from in memory buffer
XGB_DLL int XGDMatrixNumCol(DMatrixHandle handle, bst_ulong *out)
get number of columns
XGB_DLL int XGDMatrixSliceDMatrixEx(DMatrixHandle handle, const int *idxset, bst_ulong len, DMatrixHandle *out, int allow_groups)
create a new dmatrix from sliced content of existing matrix
int * index
feature index
Definition: c_api.h:215
XGB_EXTERN_C typedef int XGBCallbackDataIterNext(DataIterHandle data_handle, XGBCallbackSetData *set_function, DataHolderHandle set_function_handle)
The data reading callback function. The iterator will be able to give subset of batch in the data...
size_t columns
Definition: c_api.h:201
XGB_DLL int XGBoosterSaveRabitCheckpoint(BoosterHandle handle)
Save the current checkpoint to rabit.
XGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle, bst_ulong *out_len, const char ***out)
Get the names of all attribute from Booster.
XGB_DLL int XGDeviceQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset, XGDMatrixCallbackNext *next, float missing, int nthread, int max_bin, DMatrixHandle *out)
Create a device DMatrix with data iterator.
XGB_DLL int XGDMatrixCreateFromMat_omp(const float *data, bst_ulong nrow, bst_ulong ncol, float missing, DMatrixHandle *out, int nthread)
create matrix content from dense matrix
XGB_DLL int XGDeviceQuantileDMatrixSetDataCudaColumnar(DMatrixHandle handle, const char *c_interface_str)
Set data on a DMatrix proxy.
XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle, DMatrixHandle dtrain, float *grad, float *hess, bst_ulong len)
update the model, by directly specify gradient and second order gradient, this can be used to replace...