xgboost
hist_util.h
Go to the documentation of this file.
1 
7 #ifndef XGBOOST_COMMON_HIST_UTIL_H_
8 #define XGBOOST_COMMON_HIST_UTIL_H_
9 
10 #include <xgboost/data.h>
12 #include <limits>
13 #include <vector>
14 #include <algorithm>
15 #include <memory>
16 #include <utility>
17 #include <map>
18 
19 #include "row_set.h"
20 #include "common.h"
21 #include "threading_utils.h"
22 #include "../tree/param.h"
23 #include "./quantile.h"
24 #include "./timer.h"
25 #include "../include/rabit/rabit.h"
26 
27 namespace xgboost {
28 class GHistIndexMatrix;
29 
30 namespace common {
36 
37 // A CSC matrix representing histogram cuts, used in CPU quantile hist.
38 // The cut values represent upper bounds of bins containing approximately equal numbers of elements
40  protected:
41  using BinIdx = uint32_t;
42 
43  public:
46  // storing minimum value in a sketch set.
48 
49  HistogramCuts();
55  cut_ptrs_.Copy(that.cut_ptrs_);
56  min_vals_.Copy(that.min_vals_);
57  }
58 
59  HistogramCuts(HistogramCuts&& that) noexcept(true) {
60  *this = std::forward<HistogramCuts&&>(that);
61  }
62 
68  cut_ptrs_.Copy(that.cut_ptrs_);
69  min_vals_.Copy(that.min_vals_);
70  return *this;
71  }
72 
73  HistogramCuts& operator=(HistogramCuts&& that) noexcept(true) {
74  cut_ptrs_ = std::move(that.cut_ptrs_);
75  cut_values_ = std::move(that.cut_values_);
76  min_vals_ = std::move(that.min_vals_);
77  return *this;
78  }
79 
80  uint32_t FeatureBins(uint32_t feature) const {
81  return cut_ptrs_.ConstHostVector().at(feature + 1) -
82  cut_ptrs_.ConstHostVector()[feature];
83  }
84 
85  // Getters. Cuts should be of no use after building histogram indices, but currently
86  // they are deeply linked with quantile_hist, gpu sketcher and gpu_hist, so we preserve
87  // these for now.
88  std::vector<uint32_t> const& Ptrs() const { return cut_ptrs_.ConstHostVector(); }
89  std::vector<float> const& Values() const { return cut_values_.ConstHostVector(); }
90  std::vector<float> const& MinValues() const { return min_vals_.ConstHostVector(); }
91 
92  size_t TotalBins() const { return cut_ptrs_.ConstHostVector().back(); }
93 
94  // Return the index of a cut point that is strictly greater than the input
95  // value, or the last available index if none exists
96  BinIdx SearchBin(float value, uint32_t column_id) const {
97  auto beg = cut_ptrs_.ConstHostVector().at(column_id);
98  auto end = cut_ptrs_.ConstHostVector().at(column_id + 1);
99  const auto &values = cut_values_.ConstHostVector();
100  auto it = std::upper_bound(values.cbegin() + beg, values.cbegin() + end, value);
101  BinIdx idx = it - values.cbegin();
102  if (idx == end) {
103  idx -= 1;
104  }
105  return idx;
106  }
107 
108  BinIdx SearchBin(Entry const& e) const {
109  return SearchBin(e.fvalue, e.index);
110  }
111 };
112 
113 inline HistogramCuts SketchOnDMatrix(DMatrix *m, int32_t max_bins,
114  Span<float> const hessian = {}) {
115  HistogramCuts out;
116  auto const& info = m->Info();
117  const auto threads = omp_get_max_threads();
118  std::vector<std::vector<bst_row_t>> column_sizes(threads);
119  for (auto& column : column_sizes) {
120  column.resize(info.num_col_, 0);
121  }
122  std::vector<bst_row_t> reduced(info.num_col_, 0);
123  for (auto const& page : m->GetBatches<SparsePage>()) {
124  auto const &entries_per_column =
125  HostSketchContainer::CalcColumnSize(page, info.num_col_, threads);
126  for (size_t i = 0; i < entries_per_column.size(); ++i) {
127  reduced[i] += entries_per_column[i];
128  }
129  }
130  HostSketchContainer container(reduced, max_bins,
132  HostSketchContainer::UseGroup(info), threads);
133  for (auto const &page : m->GetBatches<SparsePage>()) {
134  container.PushRowPage(page, info, hessian);
135  }
136  container.MakeCuts(&out);
137  return out;
138 }
139 
140 enum BinTypeSize : uint32_t {
144 };
145 
146 struct Index {
147  Index() {
148  SetBinTypeSize(binTypeSize_);
149  }
150  Index(const Index& i) = delete;
151  Index& operator=(Index i) = delete;
152  Index(Index&& i) = delete;
153  Index& operator=(Index&& i) = delete;
154  uint32_t operator[](size_t i) const {
155  if (offset_ptr_ != nullptr) {
156  return func_(data_ptr_, i) + offset_ptr_[i%p_];
157  } else {
158  return func_(data_ptr_, i);
159  }
160  }
161  void SetBinTypeSize(BinTypeSize binTypeSize) {
162  binTypeSize_ = binTypeSize;
163  switch (binTypeSize) {
164  case kUint8BinsTypeSize:
165  func_ = &GetValueFromUint8;
166  break;
167  case kUint16BinsTypeSize:
168  func_ = &GetValueFromUint16;
169  break;
170  case kUint32BinsTypeSize:
171  func_ = &GetValueFromUint32;
172  break;
173  default:
174  CHECK(binTypeSize == kUint8BinsTypeSize ||
175  binTypeSize == kUint16BinsTypeSize ||
176  binTypeSize == kUint32BinsTypeSize);
177  }
178  }
180  return binTypeSize_;
181  }
182  template<typename T>
183  T* data() const { // NOLINT
184  return static_cast<T*>(data_ptr_);
185  }
186  uint32_t* Offset() const {
187  return offset_ptr_;
188  }
189  size_t OffsetSize() const {
190  return offset_.size();
191  }
192  size_t Size() const {
193  return data_.size() / (binTypeSize_);
194  }
195  void Resize(const size_t nBytesData) {
196  data_.resize(nBytesData);
197  data_ptr_ = reinterpret_cast<void*>(data_.data());
198  }
199  void ResizeOffset(const size_t nDisps) {
200  offset_.resize(nDisps);
201  offset_ptr_ = offset_.data();
202  p_ = nDisps;
203  }
204  std::vector<uint8_t>::const_iterator begin() const { // NOLINT
205  return data_.begin();
206  }
207  std::vector<uint8_t>::const_iterator end() const { // NOLINT
208  return data_.end();
209  }
210 
211  std::vector<uint8_t>::iterator begin() { // NOLINT
212  return data_.begin();
213  }
214  std::vector<uint8_t>::iterator end() { // NOLINT
215  return data_.end();
216  }
217 
218  private:
219  static uint32_t GetValueFromUint8(void *t, size_t i) {
220  return reinterpret_cast<uint8_t*>(t)[i];
221  }
222  static uint32_t GetValueFromUint16(void* t, size_t i) {
223  return reinterpret_cast<uint16_t*>(t)[i];
224  }
225  static uint32_t GetValueFromUint32(void* t, size_t i) {
226  return reinterpret_cast<uint32_t*>(t)[i];
227  }
228 
229  using Func = uint32_t (*)(void*, size_t);
230 
231  std::vector<uint8_t> data_;
232  std::vector<uint32_t> offset_; // size of this field is equal to number of features
233  void* data_ptr_;
234  BinTypeSize binTypeSize_ {kUint8BinsTypeSize};
235  size_t p_ {1};
236  uint32_t* offset_ptr_ {nullptr};
237  Func func_;
238 };
239 
240 template <typename GradientIndex>
241 int32_t XGBOOST_HOST_DEV_INLINE BinarySearchBin(size_t begin, size_t end,
242  GradientIndex const &data,
243  uint32_t const fidx_begin,
244  uint32_t const fidx_end) {
245  size_t previous_middle = std::numeric_limits<size_t>::max();
246  while (end != begin) {
247  size_t middle = begin + (end - begin) / 2;
248  if (middle == previous_middle) {
249  break;
250  }
251  previous_middle = middle;
252 
253  auto gidx = data[middle];
254 
255  if (gidx >= fidx_begin && gidx < fidx_end) {
256  return static_cast<int32_t>(gidx);
257  } else if (gidx < fidx_begin) {
258  begin = middle;
259  } else {
260  end = middle;
261  }
262  }
263  // Value is missing
264  return -1;
265 }
266 
267 class ColumnMatrix;
268 
269 template<typename GradientSumT>
271 
275 template<typename GradientSumT>
276 void InitilizeHistByZeroes(GHistRow<GradientSumT> hist, size_t begin, size_t end);
277 
281 template<typename GradientSumT>
283  size_t begin, size_t end);
284 
288 template<typename GradientSumT>
290  size_t begin, size_t end);
291 
295 template<typename GradientSumT>
297  const GHistRow<GradientSumT> src2,
298  size_t begin, size_t end);
299 
303 template<typename GradientSumT>
305  public:
308 
309  // access histogram for i-th node
311  constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
312  const size_t id = row_ptr_.at(nid);
313  CHECK_NE(id, kMax);
314  GradientPairT* ptr = nullptr;
315  if (contiguous_allocation_) {
316  ptr = const_cast<GradientPairT*>(data_[0].data() + nbins_*id);
317  } else {
318  ptr = const_cast<GradientPairT*>(data_[id].data());
319  }
320  return {ptr, nbins_};
321  }
322 
323  // have we computed a histogram for i-th node?
324  bool RowExists(bst_uint nid) const {
325  const uint32_t k_max = std::numeric_limits<uint32_t>::max();
326  return (nid < row_ptr_.size() && row_ptr_[nid] != k_max);
327  }
328 
329  // initialize histogram collection
330  void Init(uint32_t nbins) {
331  if (nbins_ != nbins) {
332  nbins_ = nbins;
333  // quite expensive operation, so let's do this only once
334  data_.clear();
335  }
336  row_ptr_.clear();
337  n_nodes_added_ = 0;
338  }
339 
340  // create an empty histogram for i-th node
341  void AddHistRow(bst_uint nid) {
342  constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
343  if (nid >= row_ptr_.size()) {
344  row_ptr_.resize(nid + 1, kMax);
345  }
346  CHECK_EQ(row_ptr_[nid], kMax);
347 
348  if (data_.size() < (nid + 1)) {
349  data_.resize((nid + 1));
350  }
351 
352  row_ptr_[nid] = n_nodes_added_;
353  n_nodes_added_++;
354  }
355  // allocate thread local memory i-th node
356  void AllocateData(bst_uint nid) {
357  if (data_[row_ptr_[nid]].size() == 0) {
358  data_[row_ptr_[nid]].resize(nbins_, {0, 0});
359  }
360  }
361  // allocate common buffer contiguously for all nodes, need for single Allreduce call
363  const size_t new_size = nbins_*data_.size();
364  contiguous_allocation_ = true;
365  if (data_[0].size() != new_size) {
366  data_[0].resize(new_size);
367  }
368  }
369 
370  private:
372  uint32_t nbins_ = 0;
374  uint32_t n_nodes_added_ = 0;
376  bool contiguous_allocation_ = false;
377 
378  std::vector<std::vector<GradientPairT>> data_;
379 
381  std::vector<size_t> row_ptr_;
382 };
383 
389 template<typename GradientSumT>
391  public:
393 
394  void Init(size_t nbins) {
395  if (nbins != nbins_) {
396  hist_buffer_.Init(nbins);
397  nbins_ = nbins;
398  }
399  }
400 
401  // Add new elements if needed, mark all hists as unused
402  // targeted_hists - already allocated hists which should contain final results after Reduce() call
403  void Reset(size_t nthreads, size_t nodes, const BlockedSpace2d& space,
404  const std::vector<GHistRowT>& targeted_hists) {
405  hist_buffer_.Init(nbins_);
406  tid_nid_to_hist_.clear();
407  threads_to_nids_map_.clear();
408 
409  targeted_hists_ = targeted_hists;
410 
411  CHECK_EQ(nodes, targeted_hists.size());
412 
413  nodes_ = nodes;
414  nthreads_ = nthreads;
415 
416  MatchThreadsToNodes(space);
419 
420  hist_was_used_.resize(nthreads * nodes_);
421  std::fill(hist_was_used_.begin(), hist_was_used_.end(), static_cast<int>(false));
422  }
423 
424  // Get specified hist, initialize hist by zeros if it wasn't used before
425  GHistRowT GetInitializedHist(size_t tid, size_t nid) {
426  CHECK_LT(nid, nodes_);
427  CHECK_LT(tid, nthreads_);
428 
429  int idx = tid_nid_to_hist_.at({tid, nid});
430  if (idx >= 0) {
431  hist_buffer_.AllocateData(idx);
432  }
433  GHistRowT hist = idx == -1 ? targeted_hists_[nid] : hist_buffer_[idx];
434 
435  if (!hist_was_used_[tid * nodes_ + nid]) {
436  InitilizeHistByZeroes(hist, 0, hist.size());
437  hist_was_used_[tid * nodes_ + nid] = static_cast<int>(true);
438  }
439 
440  return hist;
441  }
442 
443  // Reduce following bins (begin, end] for nid-node in dst across threads
444  void ReduceHist(size_t nid, size_t begin, size_t end) {
445  CHECK_GT(end, begin);
446  CHECK_LT(nid, nodes_);
447 
448  GHistRowT dst = targeted_hists_[nid];
449 
450  bool is_updated = false;
451  for (size_t tid = 0; tid < nthreads_; ++tid) {
452  if (hist_was_used_[tid * nodes_ + nid]) {
453  is_updated = true;
454 
455  int idx = tid_nid_to_hist_.at({tid, nid});
456  GHistRowT src = idx == -1 ? targeted_hists_[nid] : hist_buffer_[idx];
457 
458  if (dst.data() != src.data()) {
459  IncrementHist(dst, src, begin, end);
460  }
461  }
462  }
463  if (!is_updated) {
464  // In distributed mode - some tree nodes can be empty on local machines,
465  // So we need just set local hist by zeros in this case
466  InitilizeHistByZeroes(dst, begin, end);
467  }
468  }
469 
470  protected:
471  void MatchThreadsToNodes(const BlockedSpace2d& space) {
472  const size_t space_size = space.Size();
473  const size_t chunck_size = space_size / nthreads_ + !!(space_size % nthreads_);
474 
475  threads_to_nids_map_.resize(nthreads_ * nodes_, false);
476 
477  for (size_t tid = 0; tid < nthreads_; ++tid) {
478  size_t begin = chunck_size * tid;
479  size_t end = std::min(begin + chunck_size, space_size);
480 
481  if (begin < space_size) {
482  size_t nid_begin = space.GetFirstDimension(begin);
483  size_t nid_end = space.GetFirstDimension(end-1);
484 
485  for (size_t nid = nid_begin; nid <= nid_end; ++nid) {
486  // true - means thread 'tid' will work to compute partial hist for node 'nid'
487  threads_to_nids_map_[tid * nodes_ + nid] = true;
488  }
489  }
490  }
491  }
492 
494  size_t hist_allocated_additionally = 0;
495 
496  for (size_t nid = 0; nid < nodes_; ++nid) {
497  int nthreads_for_nid = 0;
498 
499  for (size_t tid = 0; tid < nthreads_; ++tid) {
500  if (threads_to_nids_map_[tid * nodes_ + nid]) {
501  nthreads_for_nid++;
502  }
503  }
504 
505  // In distributed mode - some tree nodes can be empty on local machines,
506  // set nthreads_for_nid to 0 in this case.
507  // In another case - allocate additional (nthreads_for_nid - 1) histograms,
508  // because one is already allocated externally (will store final result for the node).
509  hist_allocated_additionally += std::max<int>(0, nthreads_for_nid - 1);
510  }
511 
512  for (size_t i = 0; i < hist_allocated_additionally; ++i) {
513  hist_buffer_.AddHistRow(i);
514  }
515  }
516 
518  size_t hist_allocated_additionally = 0;
519 
520  for (size_t nid = 0; nid < nodes_; ++nid) {
521  bool first_hist = true;
522  for (size_t tid = 0; tid < nthreads_; ++tid) {
523  if (threads_to_nids_map_[tid * nodes_ + nid]) {
524  if (first_hist) {
525  tid_nid_to_hist_[{tid, nid}] = -1;
526  first_hist = false;
527  } else {
528  tid_nid_to_hist_[{tid, nid}] = hist_allocated_additionally++;
529  }
530  }
531  }
532  }
533  }
534 
536  size_t nbins_ = 0;
538  size_t nthreads_ = 0;
540  size_t nodes_ = 0;
548  std::vector<int> hist_was_used_;
549 
551  std::vector<bool> threads_to_nids_map_;
553  std::vector<GHistRowT> targeted_hists_;
558  std::map<std::pair<size_t, size_t>, int> tid_nid_to_hist_;
559 };
560 
564 template<typename GradientSumT>
566  public:
568 
569  GHistBuilder() = default;
570  GHistBuilder(size_t nthread, uint32_t nbins) : nthread_{nthread}, nbins_{nbins} {}
571 
572  // construct a histogram via histogram aggregation
573  template <bool any_missing>
574  void BuildHist(const std::vector<GradientPair>& gpair,
575  const RowSetCollection::Elem row_indices,
576  const GHistIndexMatrix& gmat,
577  GHistRowT hist);
578  // construct a histogram via subtraction trick
579  void SubtractionTrick(GHistRowT self,
580  GHistRowT sibling,
581  GHistRowT parent);
582 
583  uint32_t GetNumBins() const {
584  return nbins_;
585  }
586 
587  private:
589  size_t nthread_ { 0 };
591  uint32_t nbins_ { 0 };
592 };
593 
599 template<typename T, size_t MaxStackSize>
601  public:
602  explicit MemStackAllocator(size_t required_size): required_size_(required_size) {
603  }
604 
605  T* Get() {
606  if (!ptr_) {
607  if (MaxStackSize >= required_size_) {
608  ptr_ = stack_mem_;
609  } else {
610  ptr_ = reinterpret_cast<T*>(malloc(required_size_ * sizeof(T)));
611  do_free_ = true;
612  }
613  }
614 
615  return ptr_;
616  }
617 
619  if (do_free_) free(ptr_);
620  }
621 
622 
623  private:
624  T* ptr_ = nullptr;
625  bool do_free_ = false;
626  size_t required_size_;
627  T stack_mem_[MaxStackSize];
628 };
629 } // namespace common
630 } // namespace xgboost
631 #endif // XGBOOST_COMMON_HIST_UTIL_H_
xgboost::Entry::index
bst_feature_t index
feature index
Definition: data.h:189
quantile.h
util to compute quantiles
xgboost::DMatrix::Info
virtual MetaInfo & Info()=0
meta information of the dataset
XGBOOST_HOST_DEV_INLINE
#define XGBOOST_HOST_DEV_INLINE
Definition: base.h:91
xgboost::common::Index::end
std::vector< uint8_t >::iterator end()
Definition: hist_util.h:214
xgboost::common::IncrementHist
void IncrementHist(GHistRow< GradientSumT > dst, const GHistRow< GradientSumT > add, size_t begin, size_t end)
Increment hist as dst += add in range [begin, end)
xgboost::detail::GradientPairInternal
Implementation of gradient statistics pair. Template specialisation may be used to overload different...
Definition: base.h:141
xgboost::common::HistCollection::AllocateData
void AllocateData(bst_uint nid)
Definition: hist_util.h:356
xgboost::common::ParallelGHistBuilder::GetInitializedHist
GHistRowT GetInitializedHist(size_t tid, size_t nid)
Definition: hist_util.h:425
xgboost::common::kUint32BinsTypeSize
@ kUint32BinsTypeSize
Definition: hist_util.h:143
xgboost::common::ParallelGHistBuilder::nodes_
size_t nodes_
number of nodes which will be processed in parallel
Definition: hist_util.h:540
xgboost::common::HistogramCuts::TotalBins
size_t TotalBins() const
Definition: hist_util.h:92
xgboost::common::BlockedSpace2d::Size
size_t Size() const
Definition: threading_utils.h:102
xgboost::common::HistogramCuts::HistogramCuts
HistogramCuts(HistogramCuts const &that)
Definition: hist_util.h:50
xgboost::common::HistCollection::AllocateAllData
void AllocateAllData()
Definition: hist_util.h:362
xgboost::common::HistogramCuts::BinIdx
uint32_t BinIdx
Definition: hist_util.h:41
xgboost::Entry
Element from a sparse vector.
Definition: data.h:187
row_set.h
Quick Utility to compute subset of rows.
xgboost::common::HistogramCuts
Definition: hist_util.h:39
xgboost::common::Index::begin
std::vector< uint8_t >::iterator begin()
Definition: hist_util.h:211
xgboost::common::ParallelGHistBuilder::threads_to_nids_map_
std::vector< bool > threads_to_nids_map_
Buffer for additional histograms for Parallel processing
Definition: hist_util.h:551
xgboost::HostDeviceVector< bst_float >
xgboost::common::ParallelGHistBuilder::ReduceHist
void ReduceHist(size_t nid, size_t begin, size_t end)
Definition: hist_util.h:444
xgboost::common::HistogramCuts::HistogramCuts
HistogramCuts()
xgboost::common::ParallelGHistBuilder::Reset
void Reset(size_t nthreads, size_t nodes, const BlockedSpace2d &space, const std::vector< GHistRowT > &targeted_hists)
Definition: hist_util.h:403
xgboost::common::MemStackAllocator::~MemStackAllocator
~MemStackAllocator()
Definition: hist_util.h:618
xgboost::common::Index::ResizeOffset
void ResizeOffset(const size_t nDisps)
Definition: hist_util.h:199
xgboost::common::Index
Definition: hist_util.h:146
xgboost::common::HistogramCuts::HistogramCuts
HistogramCuts(HistogramCuts &&that) noexcept(true)
Definition: hist_util.h:59
xgboost::common::Index::Resize
void Resize(const size_t nBytesData)
Definition: hist_util.h:195
xgboost::HostDeviceVector::ConstHostSpan
common::Span< T const > ConstHostSpan() const
Definition: host_device_vector.h:114
xgboost::common::HistogramCuts::cut_ptrs_
HostDeviceVector< uint32_t > cut_ptrs_
Definition: hist_util.h:45
xgboost::common::BlockedSpace2d::GetFirstDimension
size_t GetFirstDimension(size_t i) const
Definition: threading_utils.h:107
xgboost::common::HistogramCuts::SearchBin
BinIdx SearchBin(float value, uint32_t column_id) const
Definition: hist_util.h:96
xgboost::common::ParallelGHistBuilder::tid_nid_to_hist_
std::map< std::pair< size_t, size_t >, int > tid_nid_to_hist_
map pair {tid, nid} to index of allocated histogram from hist_buffer_ and targeted_hists_,...
Definition: hist_util.h:558
xgboost::common::SketchOnDMatrix
HistogramCuts SketchOnDMatrix(DMatrix *m, int32_t max_bins, Span< float > const hessian={})
Definition: hist_util.h:113
xgboost::common::CopyHist
void CopyHist(GHistRow< GradientSumT > dst, const GHistRow< GradientSumT > src, size_t begin, size_t end)
Copy hist from src to dst in range [begin, end)
xgboost::DMatrix
Internal data structured used by XGBoost during training.
Definition: data.h:469
xgboost::common::HostSketchContainer::CalcColumnSize
static std::vector< bst_row_t > CalcColumnSize(SparsePage const &page, bst_feature_t const n_columns, size_t const nthreads)
xgboost::common::BlockedSpace2d
Definition: threading_utils.h:72
xgboost::common::Index::Offset
uint32_t * Offset() const
Definition: hist_util.h:186
xgboost::common::GHistBuilder::GetNumBins
uint32_t GetNumBins() const
Definition: hist_util.h:583
xgboost::common::ParallelGHistBuilder::AllocateAdditionalHistograms
void AllocateAdditionalHistograms()
Definition: hist_util.h:493
xgboost::common::Index::SetBinTypeSize
void SetBinTypeSize(BinTypeSize binTypeSize)
Definition: hist_util.h:161
xgboost::common::Index::Size
size_t Size() const
Definition: hist_util.h:192
xgboost::common::Index::data
T * data() const
Definition: hist_util.h:183
xgboost::common::HistCollection::AddHistRow
void AddHistRow(bst_uint nid)
Definition: hist_util.h:341
xgboost::common::HistogramCuts::Ptrs
std::vector< uint32_t > const & Ptrs() const
Definition: hist_util.h:88
xgboost::common::HistCollection::Init
void Init(uint32_t nbins)
Definition: hist_util.h:330
xgboost::common::HistCollection
histogram of gradient statistics for multiple nodes
Definition: hist_util.h:304
xgboost::common::HistogramCuts::min_vals_
HostDeviceVector< float > min_vals_
Definition: hist_util.h:47
xgboost::common::HistCollection::operator[]
GHistRowT operator[](bst_uint nid) const
Definition: hist_util.h:310
xgboost::MetaInfo::feature_types
HostDeviceVector< FeatureType > feature_types
Definition: data.h:91
xgboost::common::HistogramCuts::operator=
HistogramCuts & operator=(HistogramCuts &&that) noexcept(true)
Definition: hist_util.h:73
timer.h
xgboost::common::BinarySearchBin
int32_t XGBOOST_HOST_DEV_INLINE BinarySearchBin(size_t begin, size_t end, GradientIndex const &data, uint32_t const fidx_begin, uint32_t const fidx_end)
Definition: hist_util.h:241
xgboost::common::ParallelGHistBuilder
Stores temporary histograms to compute them in parallel Supports processing multiple tree-nodes for n...
Definition: hist_util.h:390
xgboost::common::GHistBuilder::GHistBuilder
GHistBuilder(size_t nthread, uint32_t nbins)
Definition: hist_util.h:570
xgboost::common::kUint8BinsTypeSize
@ kUint8BinsTypeSize
Definition: hist_util.h:141
xgboost::common::Index::operator[]
uint32_t operator[](size_t i) const
Definition: hist_util.h:154
xgboost::common::HistCollection::RowExists
bool RowExists(bst_uint nid) const
Definition: hist_util.h:324
xgboost::common::GHistBuilder::GHistBuilder
GHistBuilder()=default
xgboost::common::InitilizeHistByZeroes
void InitilizeHistByZeroes(GHistRow< GradientSumT > hist, size_t begin, size_t end)
fill a histogram by zeros
xgboost::common::HistogramCuts::FeatureBins
uint32_t FeatureBins(uint32_t feature) const
Definition: hist_util.h:80
xgboost::common::HistogramCuts::MinValues
std::vector< float > const & MinValues() const
Definition: hist_util.h:90
xgboost::common::ParallelGHistBuilder::nbins_
size_t nbins_
number of bins in each histogram
Definition: hist_util.h:536
xgboost::common::Index::operator=
Index & operator=(Index i)=delete
xgboost::common::ParallelGHistBuilder::MatchThreadsToNodes
void MatchThreadsToNodes(const BlockedSpace2d &space)
Definition: hist_util.h:471
xgboost::HostDeviceVector::Copy
void Copy(const HostDeviceVector< T > &other)
xgboost::common::SubtractionHist
void SubtractionHist(GHistRow< GradientSumT > dst, const GHistRow< GradientSumT > src1, const GHistRow< GradientSumT > src2, size_t begin, size_t end)
Compute Subtraction: dst = src1 - src2 in range [begin, end)
xgboost::common::ParallelGHistBuilder::nthreads_
size_t nthreads_
number of threads for parallel computation
Definition: hist_util.h:538
xgboost::common::ParallelGHistBuilder::MatchNodeNidPairToHist
void MatchNodeNidPairToHist()
Definition: hist_util.h:517
xgboost::HostDeviceVector::Size
size_t Size() const
xgboost::HostDeviceVector::Resize
void Resize(size_t new_size, T v=T())
xgboost::common::Index::GetBinTypeSize
BinTypeSize GetBinTypeSize() const
Definition: hist_util.h:179
xgboost::common::MemStackAllocator::MemStackAllocator
MemStackAllocator(size_t required_size)
Definition: hist_util.h:602
xgboost::common::HostSketchContainer::UseGroup
static bool UseGroup(MetaInfo const &info)
Definition: quantile.h:731
xgboost::common::BinTypeSize
BinTypeSize
Definition: hist_util.h:140
common.h
Common utilities.
xgboost::common::GHistBuilder::GHistRowT
GHistRow< GradientSumT > GHistRowT
Definition: hist_util.h:567
generic_parameters.h
xgboost::common::Span< uint32_t const >
data.h
The input data structure of xgboost.
threading_utils.h
xgboost::common::Index::Index
Index()
Definition: hist_util.h:147
xgboost::common::ParallelGHistBuilder::Init
void Init(size_t nbins)
Definition: hist_util.h:394
xgboost::common::HistogramCuts::SearchBin
BinIdx SearchBin(Entry const &e) const
Definition: hist_util.h:108
xgboost::HostDeviceVector::ConstHostVector
const std::vector< T > & ConstHostVector() const
xgboost::common::Span::size
constexpr XGBOOST_DEVICE index_type size() const __span_noexcept
Definition: span.h:547
xgboost::common::MemStackAllocator
A C-style array with in-stack allocation. As long as the array is smaller than MaxStackSize,...
Definition: hist_util.h:600
xgboost::common::Span::data
constexpr XGBOOST_DEVICE pointer data() const __span_noexcept
Definition: span.h:542
xgboost::common::HistogramCuts::cut_values_
HostDeviceVector< bst_float > cut_values_
Definition: hist_util.h:44
xgboost::bst_uint
uint32_t bst_uint
unsigned integer type used for feature index.
Definition: base.h:113
xgboost::common::kUint16BinsTypeSize
@ kUint16BinsTypeSize
Definition: hist_util.h:142
xgboost::common::ParallelGHistBuilder::targeted_hists_
std::vector< GHistRowT > targeted_hists_
Contains histograms for final results
Definition: hist_util.h:553
xgboost::common::MemStackAllocator::Get
T * Get()
Definition: hist_util.h:605
xgboost::Entry::fvalue
bst_float fvalue
feature value
Definition: data.h:191
xgboost::common::Index::begin
std::vector< uint8_t >::const_iterator begin() const
Definition: hist_util.h:204
xgboost::common::ParallelGHistBuilder::hist_buffer_
HistCollection< GradientSumT > hist_buffer_
Buffer for additional histograms for Parallel processing
Definition: hist_util.h:542
xgboost::DMatrix::GetBatches
BatchSet< T > GetBatches(const BatchParam &param={})
Gets batches. Use range based for loop over BatchSet to access individual batches.
xgboost::common::HistogramCuts::operator=
HistogramCuts & operator=(HistogramCuts const &that)
Definition: hist_util.h:63
xgboost::common::Index::end
std::vector< uint8_t >::const_iterator end() const
Definition: hist_util.h:207
xgboost::common::HistogramCuts::Values
std::vector< float > const & Values() const
Definition: hist_util.h:89
xgboost::common::GHistBuilder::SubtractionTrick
void SubtractionTrick(GHistRowT self, GHistRowT sibling, GHistRowT parent)
xgboost::common::ParallelGHistBuilder::hist_was_used_
std::vector< int > hist_was_used_
Marks which hists were used, it means that they should be merged. Contains only {true or false} value...
Definition: hist_util.h:548
xgboost::common::Index::OffsetSize
size_t OffsetSize() const
Definition: hist_util.h:189
xgboost::common::GHistBuilder
builder for histograms of gradient statistics
Definition: hist_util.h:565
xgboost::common::GHistBuilder::BuildHist
void BuildHist(const std::vector< GradientPair > &gpair, const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat, GHistRowT hist)
xgboost
namespace of xgboost
Definition: base.h:110