xgboost
hist_util.h
Go to the documentation of this file.
1 
7 #ifndef XGBOOST_COMMON_HIST_UTIL_H_
8 #define XGBOOST_COMMON_HIST_UTIL_H_
9 
10 #include <xgboost/data.h>
12 #include <limits>
13 #include <vector>
14 #include <algorithm>
15 #include <memory>
16 #include <utility>
17 #include <map>
18 
19 #include "categorical.h"
20 #include "common.h"
21 #include "quantile.h"
22 #include "row_set.h"
23 #include "threading_utils.h"
24 #include "timer.h"
25 
26 namespace xgboost {
27 class GHistIndexMatrix;
28 
29 namespace common {
35 
36 // A CSC matrix representing histogram cuts.
37 // The cut values represent upper bounds of bins containing approximately equal numbers of elements
39  bool has_categorical_{false};
40  float max_cat_{-1.0f};
41 
42  protected:
43  using BinIdx = uint32_t;
44 
45  void Swap(HistogramCuts&& that) noexcept(true) {
46  std::swap(cut_values_, that.cut_values_);
47  std::swap(cut_ptrs_, that.cut_ptrs_);
48  std::swap(min_vals_, that.min_vals_);
49 
50  std::swap(has_categorical_, that.has_categorical_);
51  std::swap(max_cat_, that.max_cat_);
52  }
53 
54  void Copy(HistogramCuts const& that) {
59  cut_ptrs_.Copy(that.cut_ptrs_);
60  min_vals_.Copy(that.min_vals_);
61  has_categorical_ = that.has_categorical_;
62  max_cat_ = that.max_cat_;
63  }
64 
65  public:
68  // storing minimum value in a sketch set.
70 
71  HistogramCuts();
72  HistogramCuts(HistogramCuts const& that) { this->Copy(that); }
73 
74  HistogramCuts(HistogramCuts&& that) noexcept(true) {
75  this->Swap(std::forward<HistogramCuts>(that));
76  }
77 
79  this->Copy(that);
80  return *this;
81  }
82 
83  HistogramCuts& operator=(HistogramCuts&& that) noexcept(true) {
84  this->Swap(std::forward<HistogramCuts>(that));
85  return *this;
86  }
87 
88  uint32_t FeatureBins(bst_feature_t feature) const {
89  return cut_ptrs_.ConstHostVector().at(feature + 1) - cut_ptrs_.ConstHostVector()[feature];
90  }
91 
92  std::vector<uint32_t> const& Ptrs() const { return cut_ptrs_.ConstHostVector(); }
93  std::vector<float> const& Values() const { return cut_values_.ConstHostVector(); }
94  std::vector<float> const& MinValues() const { return min_vals_.ConstHostVector(); }
95 
96  bool HasCategorical() const { return has_categorical_; }
97  float MaxCategory() const { return max_cat_; }
104  void SetCategorical(bool has_cat, float max_cat) {
105  has_categorical_ = has_cat;
106  max_cat_ = max_cat;
107  }
108 
109  size_t TotalBins() const { return cut_ptrs_.ConstHostVector().back(); }
110 
111  // Return the index of a cut point that is strictly greater than the input
112  // value, or the last available index if none exists
113  BinIdx SearchBin(float value, bst_feature_t column_id, std::vector<uint32_t> const& ptrs,
114  std::vector<float> const& values) const {
115  auto end = ptrs[column_id + 1];
116  auto beg = ptrs[column_id];
117  auto it = std::upper_bound(values.cbegin() + beg, values.cbegin() + end, value);
118  BinIdx idx = it - values.cbegin();
119  idx -= !!(idx == end);
120  return idx;
121  }
122 
123  BinIdx SearchBin(float value, bst_feature_t column_id) const {
124  return this->SearchBin(value, column_id, Ptrs(), Values());
125  }
126 
130  BinIdx SearchBin(Entry const& e) const {
131  return SearchBin(e.fvalue, e.index);
132  }
133 
137  BinIdx SearchCatBin(Entry const &e) const {
138  auto const &ptrs = this->Ptrs();
139  auto const &vals = this->Values();
140  auto end = ptrs.at(e.index + 1) + vals.cbegin();
141  auto beg = ptrs[e.index] + vals.cbegin();
142  // Truncates the value in case it's not perfectly rounded.
143  auto v = static_cast<float>(common::AsCat(e.fvalue));
144  auto bin_idx = std::lower_bound(beg, end, v) - vals.cbegin();
145  if (bin_idx == ptrs.at(e.index + 1)) {
146  bin_idx -= 1;
147  }
148  return bin_idx;
149  }
150 };
151 
158 inline HistogramCuts SketchOnDMatrix(DMatrix* m, int32_t max_bins, int32_t n_threads,
159  bool use_sorted = false, Span<float> const hessian = {}) {
160  HistogramCuts out;
161  auto const& info = m->Info();
162  std::vector<std::vector<bst_row_t>> column_sizes(n_threads);
163  for (auto& column : column_sizes) {
164  column.resize(info.num_col_, 0);
165  }
166  std::vector<bst_row_t> reduced(info.num_col_, 0);
167  for (auto const& page : m->GetBatches<SparsePage>()) {
168  auto const& entries_per_column =
169  HostSketchContainer::CalcColumnSize(page, info.num_col_, n_threads);
170  for (size_t i = 0; i < entries_per_column.size(); ++i) {
171  reduced[i] += entries_per_column[i];
172  }
173  }
174 
175  if (!use_sorted) {
176  HostSketchContainer container(max_bins, m->Info(), reduced, HostSketchContainer::UseGroup(info),
177  hessian, n_threads);
178  for (auto const& page : m->GetBatches<SparsePage>()) {
179  container.PushRowPage(page, info, hessian);
180  }
181  container.MakeCuts(&out);
182  } else {
183  SortedSketchContainer container{
184  max_bins, m->Info(), reduced, HostSketchContainer::UseGroup(info), hessian, n_threads};
185  for (auto const& page : m->GetBatches<SortedCSCPage>()) {
186  container.PushColPage(page, info, hessian);
187  }
188  container.MakeCuts(&out);
189  }
190 
191  return out;
192 }
193 
194 enum BinTypeSize : uint32_t {
198 };
199 
207 struct Index {
208  Index() { SetBinTypeSize(binTypeSize_); }
209  Index(const Index& i) = delete;
210  Index& operator=(Index i) = delete;
211  Index(Index&& i) = delete;
212  Index& operator=(Index&& i) = delete;
213  uint32_t operator[](size_t i) const {
214  if (!bin_offset_.empty()) {
215  // dense, compressed
216  auto fidx = i % bin_offset_.size();
217  // restore the index by adding back its feature offset.
218  return func_(data_.data(), i) + bin_offset_[fidx];
219  } else {
220  return func_(data_.data(), i);
221  }
222  }
223  void SetBinTypeSize(BinTypeSize binTypeSize) {
224  binTypeSize_ = binTypeSize;
225  switch (binTypeSize) {
226  case kUint8BinsTypeSize:
227  func_ = &GetValueFromUint8;
228  break;
229  case kUint16BinsTypeSize:
230  func_ = &GetValueFromUint16;
231  break;
232  case kUint32BinsTypeSize:
233  func_ = &GetValueFromUint32;
234  break;
235  default:
236  CHECK(binTypeSize == kUint8BinsTypeSize || binTypeSize == kUint16BinsTypeSize ||
237  binTypeSize == kUint32BinsTypeSize);
238  }
239  }
241  return binTypeSize_;
242  }
243  template <typename T>
244  T const* data() const { // NOLINT
245  return reinterpret_cast<T const*>(data_.data());
246  }
247  template <typename T>
248  T* data() { // NOLINT
249  return reinterpret_cast<T*>(data_.data());
250  }
251  uint32_t const* Offset() const { return bin_offset_.data(); }
252  size_t OffsetSize() const { return bin_offset_.size(); }
253  size_t Size() const { return data_.size() / (binTypeSize_); }
254 
255  void Resize(const size_t n_bytes) {
256  data_.resize(n_bytes);
257  }
258  // set the offset used in compression, cut_ptrs is the CSC indptr in HistogramCuts
259  void SetBinOffset(std::vector<uint32_t> const& cut_ptrs) {
260  bin_offset_.resize(cut_ptrs.size() - 1); // resize to number of features.
261  std::copy_n(cut_ptrs.begin(), bin_offset_.size(), bin_offset_.begin());
262  }
263  std::vector<uint8_t>::const_iterator begin() const { // NOLINT
264  return data_.begin();
265  }
266  std::vector<uint8_t>::const_iterator end() const { // NOLINT
267  return data_.end();
268  }
269 
270  std::vector<uint8_t>::iterator begin() { // NOLINT
271  return data_.begin();
272  }
273  std::vector<uint8_t>::iterator end() { // NOLINT
274  return data_.end();
275  }
276 
277  private:
278  // Functions to decompress the index.
279  static uint32_t GetValueFromUint8(uint8_t const* t, size_t i) { return t[i]; }
280  static uint32_t GetValueFromUint16(uint8_t const* t, size_t i) {
281  return reinterpret_cast<uint16_t const*>(t)[i];
282  }
283  static uint32_t GetValueFromUint32(uint8_t const* t, size_t i) {
284  return reinterpret_cast<uint32_t const*>(t)[i];
285  }
286 
287  using Func = uint32_t (*)(uint8_t const*, size_t);
288 
289  std::vector<uint8_t> data_;
290  // starting position of each feature inside the cut values (the indptr of the CSC cut matrix
291  // HistogramCuts without the last entry.) Used for bin compression.
292  std::vector<uint32_t> bin_offset_;
293 
294  BinTypeSize binTypeSize_ {kUint8BinsTypeSize};
295  Func func_;
296 };
297 
298 template <typename GradientIndex>
299 int32_t XGBOOST_HOST_DEV_INLINE BinarySearchBin(size_t begin, size_t end,
300  GradientIndex const &data,
301  uint32_t const fidx_begin,
302  uint32_t const fidx_end) {
303  size_t previous_middle = std::numeric_limits<size_t>::max();
304  while (end != begin) {
305  size_t middle = begin + (end - begin) / 2;
306  if (middle == previous_middle) {
307  break;
308  }
309  previous_middle = middle;
310 
311  // index into all the bins
312  auto gidx = data[middle];
313 
314  if (gidx >= fidx_begin && gidx < fidx_end) {
315  // Found the intersection.
316  return static_cast<int32_t>(gidx);
317  } else if (gidx < fidx_begin) {
318  begin = middle;
319  } else {
320  end = middle;
321  }
322  }
323  // Value is missing
324  return -1;
325 }
326 
327 class ColumnMatrix;
328 
329 template<typename GradientSumT>
331 
335 template<typename GradientSumT>
336 void InitilizeHistByZeroes(GHistRow<GradientSumT> hist, size_t begin, size_t end);
337 
341 template<typename GradientSumT>
343  size_t begin, size_t end);
344 
348 template<typename GradientSumT>
350  size_t begin, size_t end);
351 
355 template<typename GradientSumT>
357  const GHistRow<GradientSumT> src2,
358  size_t begin, size_t end);
359 
363 template<typename GradientSumT>
365  public:
368 
369  // access histogram for i-th node
371  constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
372  const size_t id = row_ptr_.at(nid);
373  CHECK_NE(id, kMax);
374  GradientPairT* ptr = nullptr;
375  if (contiguous_allocation_) {
376  ptr = const_cast<GradientPairT*>(data_[0].data() + nbins_*id);
377  } else {
378  ptr = const_cast<GradientPairT*>(data_[id].data());
379  }
380  return {ptr, nbins_};
381  }
382 
383  // have we computed a histogram for i-th node?
384  bool RowExists(bst_uint nid) const {
385  const uint32_t k_max = std::numeric_limits<uint32_t>::max();
386  return (nid < row_ptr_.size() && row_ptr_[nid] != k_max);
387  }
388 
389  // initialize histogram collection
390  void Init(uint32_t nbins) {
391  if (nbins_ != nbins) {
392  nbins_ = nbins;
393  // quite expensive operation, so let's do this only once
394  data_.clear();
395  }
396  row_ptr_.clear();
397  n_nodes_added_ = 0;
398  }
399 
400  // create an empty histogram for i-th node
401  void AddHistRow(bst_uint nid) {
402  constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
403  if (nid >= row_ptr_.size()) {
404  row_ptr_.resize(nid + 1, kMax);
405  }
406  CHECK_EQ(row_ptr_[nid], kMax);
407 
408  if (data_.size() < (nid + 1)) {
409  data_.resize((nid + 1));
410  }
411 
412  row_ptr_[nid] = n_nodes_added_;
413  n_nodes_added_++;
414  }
415  // allocate thread local memory i-th node
416  void AllocateData(bst_uint nid) {
417  if (data_[row_ptr_[nid]].size() == 0) {
418  data_[row_ptr_[nid]].resize(nbins_, {0, 0});
419  }
420  }
421  // allocate common buffer contiguously for all nodes, need for single Allreduce call
423  const size_t new_size = nbins_*data_.size();
424  contiguous_allocation_ = true;
425  if (data_[0].size() != new_size) {
426  data_[0].resize(new_size);
427  }
428  }
429 
430  private:
432  uint32_t nbins_ = 0;
434  uint32_t n_nodes_added_ = 0;
436  bool contiguous_allocation_ = false;
437 
438  std::vector<std::vector<GradientPairT>> data_;
439 
441  std::vector<size_t> row_ptr_;
442 };
443 
449 template<typename GradientSumT>
451  public:
453 
454  void Init(size_t nbins) {
455  if (nbins != nbins_) {
456  hist_buffer_.Init(nbins);
457  nbins_ = nbins;
458  }
459  }
460 
461  // Add new elements if needed, mark all hists as unused
462  // targeted_hists - already allocated hists which should contain final results after Reduce() call
463  void Reset(size_t nthreads, size_t nodes, const BlockedSpace2d& space,
464  const std::vector<GHistRowT>& targeted_hists) {
465  hist_buffer_.Init(nbins_);
466  tid_nid_to_hist_.clear();
467  threads_to_nids_map_.clear();
468 
469  targeted_hists_ = targeted_hists;
470 
471  CHECK_EQ(nodes, targeted_hists.size());
472 
473  nodes_ = nodes;
474  nthreads_ = nthreads;
475 
476  MatchThreadsToNodes(space);
478  MatchNodeNidPairToHist();
479 
480  hist_was_used_.resize(nthreads * nodes_);
481  std::fill(hist_was_used_.begin(), hist_was_used_.end(), static_cast<int>(false));
482  }
483 
484  // Get specified hist, initialize hist by zeros if it wasn't used before
485  GHistRowT GetInitializedHist(size_t tid, size_t nid) {
486  CHECK_LT(nid, nodes_);
487  CHECK_LT(tid, nthreads_);
488 
489  int idx = tid_nid_to_hist_.at({tid, nid});
490  if (idx >= 0) {
491  hist_buffer_.AllocateData(idx);
492  }
493  GHistRowT hist = idx == -1 ? targeted_hists_[nid] : hist_buffer_[idx];
494 
495  if (!hist_was_used_[tid * nodes_ + nid]) {
496  InitilizeHistByZeroes(hist, 0, hist.size());
497  hist_was_used_[tid * nodes_ + nid] = static_cast<int>(true);
498  }
499 
500  return hist;
501  }
502 
503  // Reduce following bins (begin, end] for nid-node in dst across threads
504  void ReduceHist(size_t nid, size_t begin, size_t end) const {
505  CHECK_GT(end, begin);
506  CHECK_LT(nid, nodes_);
507 
508  GHistRowT dst = targeted_hists_[nid];
509 
510  bool is_updated = false;
511  for (size_t tid = 0; tid < nthreads_; ++tid) {
512  if (hist_was_used_[tid * nodes_ + nid]) {
513  is_updated = true;
514 
515  int idx = tid_nid_to_hist_.at({tid, nid});
516  GHistRowT src = idx == -1 ? targeted_hists_[nid] : hist_buffer_[idx];
517 
518  if (dst.data() != src.data()) {
519  IncrementHist(dst, src, begin, end);
520  }
521  }
522  }
523  if (!is_updated) {
524  // In distributed mode - some tree nodes can be empty on local machines,
525  // So we need just set local hist by zeros in this case
526  InitilizeHistByZeroes(dst, begin, end);
527  }
528  }
529 
530  void MatchThreadsToNodes(const BlockedSpace2d& space) {
531  const size_t space_size = space.Size();
532  const size_t chunck_size = space_size / nthreads_ + !!(space_size % nthreads_);
533 
534  threads_to_nids_map_.resize(nthreads_ * nodes_, false);
535 
536  for (size_t tid = 0; tid < nthreads_; ++tid) {
537  size_t begin = chunck_size * tid;
538  size_t end = std::min(begin + chunck_size, space_size);
539 
540  if (begin < space_size) {
541  size_t nid_begin = space.GetFirstDimension(begin);
542  size_t nid_end = space.GetFirstDimension(end-1);
543 
544  for (size_t nid = nid_begin; nid <= nid_end; ++nid) {
545  // true - means thread 'tid' will work to compute partial hist for node 'nid'
546  threads_to_nids_map_[tid * nodes_ + nid] = true;
547  }
548  }
549  }
550  }
551 
553  size_t hist_allocated_additionally = 0;
554 
555  for (size_t nid = 0; nid < nodes_; ++nid) {
556  int nthreads_for_nid = 0;
557 
558  for (size_t tid = 0; tid < nthreads_; ++tid) {
559  if (threads_to_nids_map_[tid * nodes_ + nid]) {
560  nthreads_for_nid++;
561  }
562  }
563 
564  // In distributed mode - some tree nodes can be empty on local machines,
565  // set nthreads_for_nid to 0 in this case.
566  // In another case - allocate additional (nthreads_for_nid - 1) histograms,
567  // because one is already allocated externally (will store final result for the node).
568  hist_allocated_additionally += std::max<int>(0, nthreads_for_nid - 1);
569  }
570 
571  for (size_t i = 0; i < hist_allocated_additionally; ++i) {
572  hist_buffer_.AddHistRow(i);
573  }
574  }
575 
576  private:
577  void MatchNodeNidPairToHist() {
578  size_t hist_allocated_additionally = 0;
579 
580  for (size_t nid = 0; nid < nodes_; ++nid) {
581  bool first_hist = true;
582  for (size_t tid = 0; tid < nthreads_; ++tid) {
583  if (threads_to_nids_map_[tid * nodes_ + nid]) {
584  if (first_hist) {
585  tid_nid_to_hist_[{tid, nid}] = -1;
586  first_hist = false;
587  } else {
588  tid_nid_to_hist_[{tid, nid}] = hist_allocated_additionally++;
589  }
590  }
591  }
592  }
593  }
594 
596  size_t nbins_ = 0;
598  size_t nthreads_ = 0;
600  size_t nodes_ = 0;
602  HistCollection<GradientSumT> hist_buffer_;
608  std::vector<int> hist_was_used_;
609 
611  std::vector<bool> threads_to_nids_map_;
613  std::vector<GHistRowT> targeted_hists_;
618  std::map<std::pair<size_t, size_t>, int> tid_nid_to_hist_;
619 };
620 
624 template<typename GradientSumT>
626  public:
628 
629  GHistBuilder() = default;
630  explicit GHistBuilder(uint32_t nbins): nbins_{nbins} {}
631 
632  // construct a histogram via histogram aggregation
633  template <bool any_missing>
634  void BuildHist(const std::vector<GradientPair> &gpair,
635  const RowSetCollection::Elem row_indices,
636  const GHistIndexMatrix &gmat, GHistRowT hist) const;
637  uint32_t GetNumBins() const {
638  return nbins_;
639  }
640 
641  private:
643  uint32_t nbins_ { 0 };
644 };
645 } // namespace common
646 } // namespace xgboost
647 #endif // XGBOOST_COMMON_HIST_UTIL_H_
xgboost::Entry::index
bst_feature_t index
feature index
Definition: data.h:189
quantile.h
util to compute quantiles
xgboost::DMatrix::Info
virtual MetaInfo & Info()=0
meta information of the dataset
XGBOOST_HOST_DEV_INLINE
#define XGBOOST_HOST_DEV_INLINE
Definition: base.h:91
xgboost::common::Index::end
std::vector< uint8_t >::iterator end()
Definition: hist_util.h:273
xgboost::common::IncrementHist
void IncrementHist(GHistRow< GradientSumT > dst, const GHistRow< GradientSumT > add, size_t begin, size_t end)
Increment hist as dst += add in range [begin, end)
xgboost::detail::GradientPairInternal
Implementation of gradient statistics pair. Template specialisation may be used to overload different...
Definition: base.h:141
xgboost::common::HistCollection::AllocateData
void AllocateData(bst_uint nid)
Definition: hist_util.h:416
xgboost::common::ParallelGHistBuilder::GetInitializedHist
GHistRowT GetInitializedHist(size_t tid, size_t nid)
Definition: hist_util.h:485
categorical.h
xgboost::common::HistogramCuts::SearchBin
BinIdx SearchBin(float value, bst_feature_t column_id, std::vector< uint32_t > const &ptrs, std::vector< float > const &values) const
Definition: hist_util.h:113
xgboost::common::Index::data
T * data()
Definition: hist_util.h:248
xgboost::common::kUint32BinsTypeSize
@ kUint32BinsTypeSize
Definition: hist_util.h:197
xgboost::common::HistogramCuts::TotalBins
size_t TotalBins() const
Definition: hist_util.h:109
xgboost::common::BlockedSpace2d::Size
size_t Size() const
Definition: threading_utils.h:100
xgboost::common::HistogramCuts::FeatureBins
uint32_t FeatureBins(bst_feature_t feature) const
Definition: hist_util.h:88
xgboost::common::HistogramCuts::cut_values_
HostDeviceVector< float > cut_values_
Definition: hist_util.h:66
xgboost::common::HistogramCuts::HistogramCuts
HistogramCuts(HistogramCuts const &that)
Definition: hist_util.h:72
std::swap
void swap(xgboost::IntrusivePtr< T > &x, xgboost::IntrusivePtr< T > &y) noexcept
Definition: intrusive_ptr.h:209
xgboost::common::HistCollection::AllocateAllData
void AllocateAllData()
Definition: hist_util.h:422
xgboost::common::HistogramCuts::MaxCategory
float MaxCategory() const
Definition: hist_util.h:97
xgboost::common::HistogramCuts::BinIdx
uint32_t BinIdx
Definition: hist_util.h:43
xgboost::Entry
Element from a sparse vector.
Definition: data.h:187
row_set.h
Quick Utility to compute subset of rows.
xgboost::common::HistogramCuts
Definition: hist_util.h:38
xgboost::common::Index::Resize
void Resize(const size_t n_bytes)
Definition: hist_util.h:255
xgboost::common::Index::begin
std::vector< uint8_t >::iterator begin()
Definition: hist_util.h:270
xgboost::HostDeviceVector< float >
xgboost::common::HistogramCuts::HistogramCuts
HistogramCuts()
xgboost::common::ParallelGHistBuilder::Reset
void Reset(size_t nthreads, size_t nodes, const BlockedSpace2d &space, const std::vector< GHistRowT > &targeted_hists)
Definition: hist_util.h:463
xgboost::common::Index
Optionally compressed gradient index. The compression works only with dense data.
Definition: hist_util.h:207
xgboost::common::HistogramCuts::HistogramCuts
HistogramCuts(HistogramCuts &&that) noexcept(true)
Definition: hist_util.h:74
xgboost::common::HistogramCuts::cut_ptrs_
HostDeviceVector< uint32_t > cut_ptrs_
Definition: hist_util.h:67
xgboost::common::BlockedSpace2d::GetFirstDimension
size_t GetFirstDimension(size_t i) const
Definition: threading_utils.h:105
xgboost::common::HistogramCuts::SetCategorical
void SetCategorical(bool has_cat, float max_cat)
Set meta info about categorical features.
Definition: hist_util.h:104
xgboost::common::SketchContainerImpl< WQuantileSketch< float, float > >::CalcColumnSize
static std::vector< bst_row_t > CalcColumnSize(SparsePage const &page, bst_feature_t const n_columns, size_t const nthreads)
xgboost::common::CopyHist
void CopyHist(GHistRow< GradientSumT > dst, const GHistRow< GradientSumT > src, size_t begin, size_t end)
Copy hist from src to dst in range [begin, end)
xgboost::DMatrix
Internal data structured used by XGBoost during training.
Definition: data.h:475
xgboost::common::BlockedSpace2d
Definition: threading_utils.h:70
xgboost::common::GHistBuilder::GetNumBins
uint32_t GetNumBins() const
Definition: hist_util.h:637
xgboost::common::GHistBuilder::BuildHist
void BuildHist(const std::vector< GradientPair > &gpair, const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat, GHistRowT hist) const
xgboost::common::ParallelGHistBuilder::AllocateAdditionalHistograms
void AllocateAdditionalHistograms()
Definition: hist_util.h:552
xgboost::bst_feature_t
uint32_t bst_feature_t
Type for data column (feature) index.
Definition: base.h:123
xgboost::common::Index::SetBinTypeSize
void SetBinTypeSize(BinTypeSize binTypeSize)
Definition: hist_util.h:223
xgboost::common::Index::Size
size_t Size() const
Definition: hist_util.h:253
xgboost::common::HistCollection::AddHistRow
void AddHistRow(bst_uint nid)
Definition: hist_util.h:401
xgboost::common::HistogramCuts::Ptrs
std::vector< uint32_t > const & Ptrs() const
Definition: hist_util.h:92
xgboost::common::HistCollection::Init
void Init(uint32_t nbins)
Definition: hist_util.h:390
xgboost::common::HistCollection
histogram of gradient statistics for multiple nodes
Definition: hist_util.h:364
xgboost::common::HistogramCuts::min_vals_
HostDeviceVector< float > min_vals_
Definition: hist_util.h:69
xgboost::common::HistCollection::operator[]
GHistRowT operator[](bst_uint nid) const
Definition: hist_util.h:370
xgboost::common::HistogramCuts::operator=
HistogramCuts & operator=(HistogramCuts &&that) noexcept(true)
Definition: hist_util.h:83
timer.h
xgboost::common::BinarySearchBin
int32_t XGBOOST_HOST_DEV_INLINE BinarySearchBin(size_t begin, size_t end, GradientIndex const &data, uint32_t const fidx_begin, uint32_t const fidx_end)
Definition: hist_util.h:299
xgboost::common::ParallelGHistBuilder
Stores temporary histograms to compute them in parallel Supports processing multiple tree-nodes for n...
Definition: hist_util.h:450
xgboost::common::kUint8BinsTypeSize
@ kUint8BinsTypeSize
Definition: hist_util.h:195
xgboost::common::Index::operator[]
uint32_t operator[](size_t i) const
Definition: hist_util.h:213
xgboost::common::ParallelGHistBuilder::ReduceHist
void ReduceHist(size_t nid, size_t begin, size_t end) const
Definition: hist_util.h:504
xgboost::DMatrix::GetBatches
BatchSet< T > GetBatches()
Gets batches. Use range based for loop over BatchSet to access individual batches.
xgboost::common::HistCollection::RowExists
bool RowExists(bst_uint nid) const
Definition: hist_util.h:384
xgboost::common::GHistBuilder::GHistBuilder
GHistBuilder()=default
xgboost::common::SketchContainerImpl< WQuantileSketch< float, float > >::UseGroup
static bool UseGroup(MetaInfo const &info)
Definition: quantile.h:730
xgboost::common::InitilizeHistByZeroes
void InitilizeHistByZeroes(GHistRow< GradientSumT > hist, size_t begin, size_t end)
fill a histogram by zeros
xgboost::common::HistogramCuts::MinValues
std::vector< float > const & MinValues() const
Definition: hist_util.h:94
xgboost::common::Index::operator=
Index & operator=(Index i)=delete
xgboost::common::HistogramCuts::SearchBin
BinIdx SearchBin(float value, bst_feature_t column_id) const
Definition: hist_util.h:123
xgboost::common::ParallelGHistBuilder::MatchThreadsToNodes
void MatchThreadsToNodes(const BlockedSpace2d &space)
Definition: hist_util.h:530
xgboost::HostDeviceVector::Copy
void Copy(const HostDeviceVector< T > &other)
xgboost::common::SubtractionHist
void SubtractionHist(GHistRow< GradientSumT > dst, const GHistRow< GradientSumT > src1, const GHistRow< GradientSumT > src2, size_t begin, size_t end)
Compute Subtraction: dst = src1 - src2 in range [begin, end)
xgboost::HostDeviceVector::Size
size_t Size() const
xgboost::HostDeviceVector::Resize
void Resize(size_t new_size, T v=T())
xgboost::common::Index::GetBinTypeSize
BinTypeSize GetBinTypeSize() const
Definition: hist_util.h:240
xgboost::common::BinTypeSize
BinTypeSize
Definition: hist_util.h:194
xgboost::common::Index::data
T const * data() const
Definition: hist_util.h:244
common.h
Common utilities.
xgboost::common::GHistBuilder::GHistRowT
GHistRow< GradientSumT > GHistRowT
Definition: hist_util.h:627
generic_parameters.h
xgboost::common::Span< uint32_t const >
data.h
The input data structure of xgboost.
xgboost::common::HistogramCuts::Copy
void Copy(HistogramCuts const &that)
Definition: hist_util.h:54
threading_utils.h
xgboost::common::Index::SetBinOffset
void SetBinOffset(std::vector< uint32_t > const &cut_ptrs)
Definition: hist_util.h:259
xgboost::common::Index::Index
Index()
Definition: hist_util.h:208
xgboost::common::ParallelGHistBuilder::Init
void Init(size_t nbins)
Definition: hist_util.h:454
xgboost::common::HistogramCuts::SearchBin
BinIdx SearchBin(Entry const &e) const
Search the bin index for numerical feature.
Definition: hist_util.h:130
xgboost::HostDeviceVector::ConstHostVector
const std::vector< T > & ConstHostVector() const
xgboost::common::Span::size
constexpr XGBOOST_DEVICE index_type size() const __span_noexcept
Definition: span.h:553
xgboost::common::GHistBuilder::GHistBuilder
GHistBuilder(uint32_t nbins)
Definition: hist_util.h:630
xgboost::common::HistogramCuts::SearchCatBin
BinIdx SearchCatBin(Entry const &e) const
Search the bin index for categorical feature.
Definition: hist_util.h:137
xgboost::common::Span::data
constexpr XGBOOST_DEVICE pointer data() const __span_noexcept
Definition: span.h:548
xgboost::common::SketchOnDMatrix
HistogramCuts SketchOnDMatrix(DMatrix *m, int32_t max_bins, int32_t n_threads, bool use_sorted=false, Span< float > const hessian={})
Run CPU sketching on DMatrix.
Definition: hist_util.h:158
xgboost::bst_uint
uint32_t bst_uint
unsigned integer type used for feature index.
Definition: base.h:113
xgboost::common::kUint16BinsTypeSize
@ kUint16BinsTypeSize
Definition: hist_util.h:196
xgboost::Entry::fvalue
bst_float fvalue
feature value
Definition: data.h:191
xgboost::common::Index::begin
std::vector< uint8_t >::const_iterator begin() const
Definition: hist_util.h:263
xgboost::common::HistogramCuts::operator=
HistogramCuts & operator=(HistogramCuts const &that)
Definition: hist_util.h:78
xgboost::common::Index::end
std::vector< uint8_t >::const_iterator end() const
Definition: hist_util.h:266
xgboost::common::HistogramCuts::Values
std::vector< float > const & Values() const
Definition: hist_util.h:93
xgboost::common::Index::OffsetSize
size_t OffsetSize() const
Definition: hist_util.h:252
xgboost::common::HistogramCuts::HasCategorical
bool HasCategorical() const
Definition: hist_util.h:96
xgboost::common::GHistBuilder
builder for histograms of gradient statistics
Definition: hist_util.h:625
xgboost::common::HistogramCuts::Swap
void Swap(HistogramCuts &&that) noexcept(true)
Definition: hist_util.h:45
xgboost::common::Index::Offset
uint32_t const * Offset() const
Definition: hist_util.h:251
xgboost::common::AsCat
XGBOOST_DEVICE bst_cat_t AsCat(T const &v)
Definition: categorical.h:24
xgboost
namespace of xgboost
Definition: base.h:110