7 #ifndef XGBOOST_COMMON_HIST_UTIL_H_ 8 #define XGBOOST_COMMON_HIST_UTIL_H_ 14 #include "../tree/param.h" 17 #include "../include/rabit/rabit.h" 34 T* ptr =
static_cast<T*
>(malloc(n*
sizeof(T)));
35 memcpy(ptr, ptr_, n_ *
sizeof(T));
78 const T*
end()
const {
97 std::vector<bst_float>
cut;
98 uint32_t GetBinIdx(
const Entry &e);
104 void Init(
DMatrix* p_fmat, uint32_t max_num_bins);
106 void Init(std::vector<WXQSketch>* sketchs, uint32_t max_num_bins);
109 size_t NumBins()
const {
return row_ptr.back(); }
112 virtual size_t SearchGroupIndFromBaseRow(
113 std::vector<bst_uint>
const& group_ptr,
size_t const base_rowid)
const;
121 const tree::TrainParam& param,
HistCutMatrix* hmat,
int gpu_batch_nrows);
144 void Init(
DMatrix* p_fmat,
int max_num_bins);
147 return {&index[0] + row_ptr[i],
149 row_ptr[i + 1] - row_ptr[i])};
152 auto nfeature = cut.
row_ptr.size() - 1;
153 for (
unsigned fid = 0; fid < nfeature; ++fid) {
154 auto ibegin = cut.
row_ptr[fid];
155 auto iend = cut.
row_ptr[fid + 1];
156 for (
auto i = ibegin; i < iend; ++i) {
157 counts[fid] += hit_count[i];
163 std::vector<size_t> hit_count_tloc_;
171 : row_ptr(row_ptr), index(index) {}
180 const tree::TrainParam& param);
183 return {blocks_[i].row_ptr_begin, blocks_[i].index_begin};
187 return blocks_.size();
191 std::vector<size_t> row_ptr_;
192 std::vector<uint32_t> index_;
195 const size_t* row_ptr_begin;
196 const size_t* row_ptr_end;
197 const uint32_t* index_begin;
198 const uint32_t* index_end;
200 std::vector<Block> blocks_;
218 constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
219 CHECK_NE(row_ptr_[nid], kMax);
220 tree::GradStats* ptr =
221 const_cast<tree::GradStats*
>(dmlc::BeginPtr(data_) + row_ptr_[nid]);
222 return {ptr, nbins_};
227 const uint32_t k_max = std::numeric_limits<uint32_t>::max();
228 return (nid < row_ptr_.size() && row_ptr_[nid] != k_max);
240 constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
241 if (nid >= row_ptr_.size()) {
242 row_ptr_.resize(nid + 1, kMax);
244 CHECK_EQ(row_ptr_[nid], kMax);
246 row_ptr_[nid] = data_.size();
247 data_.resize(data_.size() + nbins_);
254 std::vector<tree::GradStats> data_;
257 std::vector<size_t> row_ptr_;
266 inline void Init(
size_t nthread, uint32_t nbins) {
269 thread_init_.resize(nthread_);
273 void BuildHist(
const std::vector<GradientPair>& gpair,
278 void BuildBlockHist(
const std::vector<GradientPair>& gpair,
294 std::vector<size_t> thread_init_;
295 std::vector<tree::GradStats> data_;
301 #endif // XGBOOST_COMMON_HIST_UTIL_H_ void Init(uint32_t nbins)
Definition: hist_util.h:232
std::vector< bst_float > cut
the cut field
Definition: hist_util.h:97
size_t NumBins() const
Definition: hist_util.h:109
T * end()
Definition: hist_util.h:74
T back() const
Definition: hist_util.h:53
size_t GetNumBlock() const
Definition: hist_util.h:186
Definition: hist_util.h:166
void AddHistRow(bst_uint nid)
Definition: hist_util.h:239
detail::ptrdiff_t index_type
Definition: span.h:387
util to compute quantiles
The input data structure of xgboost.
Definition: hist_util.h:27
Internal data structured used by XGBoost during training. There are two ways to create a customized D...
Definition: data.h:406
std::vector< uint32_t > index
The index data.
Definition: hist_util.h:138
In-memory storage unit of sparse batch, stored in CSR format.
Definition: data.h:157
const T * begin() const
Definition: hist_util.h:70
void DeviceSketch(const SparsePage &batch, const MetaInfo &info, const tree::TrainParam ¶m, HistCutMatrix *hmat, int gpu_batch_nrows)
Builds the cut matrix on the GPU.
std::vector< uint32_t > row_ptr
Unit pointer to rows by element position.
Definition: hist_util.h:93
Cut configuration for all the features.
Definition: hist_util.h:91
std::vector< size_t > hit_count
hit count of each index
Definition: hist_util.h:140
~SimpleArray()
Definition: hist_util.h:28
Quantile sketch use WXQSummary.
Definition: quantile.h:839
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition: span.h:109
T * data()
Definition: hist_util.h:57
builder for histograms of gradient statistics
Definition: hist_util.h:263
GHistIndexRow operator[](size_t i) const
Definition: hist_util.h:146
GHistIndexBlock operator[](size_t i) const
Definition: hist_util.h:182
Quick Utility to compute subset of rows.
size_t size() const
Definition: hist_util.h:49
const size_t * row_ptr
Definition: hist_util.h:167
void resize(size_t n)
Definition: hist_util.h:33
const T * end() const
Definition: hist_util.h:78
histogram of gradient statistics for multiple nodes
Definition: hist_util.h:214
Definition: hist_util.h:176
HistCutMatrix cut
The corresponding cuts.
Definition: hist_util.h:142
GHistRow operator[](bst_uint nid) const
Definition: hist_util.h:217
T * begin()
Definition: hist_util.h:66
a collection of columns, with support for construction from GHistIndexMatrix.
Definition: column_matrix.h:64
const T * data() const
Definition: hist_util.h:61
namespace of xgboost
Definition: base.h:79
data structure to store an instance set, a subset of rows (instances) associated with a particular no...
Definition: row_set.h:23
Monitor monitor_
Definition: hist_util.h:115
Timing utility used to measure total method execution time over the lifetime of the containing object...
Definition: timer.h:49
void Init(size_t nthread, uint32_t nbins)
Definition: hist_util.h:266
std::vector< size_t > row_ptr
row pointer to rows by element position
Definition: hist_util.h:136
Element from a sparse vector.
Definition: data.h:132
T & operator[](size_t idx)
Definition: hist_util.h:41
uint32_t bst_uint
unsigned integer type used in boost, used for feature index and row index.
Definition: base.h:84
std::vector< bst_float > min_val
minimum value of each feature
Definition: hist_util.h:95
uint32_t GetNumBins()
Definition: hist_util.h:285
preprocessed global index matrix, in CSR format Transform floating values to integer index in histogr...
Definition: hist_util.h:134
const uint32_t * index
Definition: hist_util.h:168
void GetFeatureCounts(size_t *counts) const
Definition: hist_util.h:151
bool RowExists(bst_uint nid) const
Definition: hist_util.h:226
GHistIndexBlock(const size_t *row_ptr, const uint32_t *index)
Definition: hist_util.h:170
T & operator[](size_t idx) const
Definition: hist_util.h:45