xgboost
quantile.h
Go to the documentation of this file.
1 
7 #ifndef XGBOOST_COMMON_QUANTILE_H_
8 #define XGBOOST_COMMON_QUANTILE_H_
9 
10 #include <dmlc/base.h>
11 #include <xgboost/logging.h>
12 #include <xgboost/data.h>
13 #include <cmath>
14 #include <vector>
15 #include <cstring>
16 #include <algorithm>
17 #include <iostream>
18 
19 #include "timer.h"
20 
21 namespace xgboost {
22 namespace common {
28 template<typename DType, typename RType>
29 struct WQSummary {
31  struct Entry {
33  RType rmin;
35  RType rmax;
37  RType wmin;
39  DType value;
40  // constructor
41  XGBOOST_DEVICE Entry() {} // NOLINT
42  // constructor
43  XGBOOST_DEVICE Entry(RType rmin, RType rmax, RType wmin, DType value)
44  : rmin(rmin), rmax(rmax), wmin(wmin), value(value) {}
49  inline void CheckValid(RType eps = 0) const {
50  CHECK(rmin >= 0 && rmax >= 0 && wmin >= 0) << "nonneg constraint";
51  CHECK(rmax- rmin - wmin > -eps) << "relation constraint: min/max";
52  }
54  XGBOOST_DEVICE inline RType RMinNext() const {
55  return rmin + wmin;
56  }
58  XGBOOST_DEVICE inline RType RMaxPrev() const {
59  return rmax - wmin;
60  }
61 
62  friend std::ostream& operator<<(std::ostream& os, Entry const& e) {
63  os << "rmin: " << e.rmin << ", "
64  << "rmax: " << e.rmax << ", "
65  << "wmin: " << e.wmin << ", "
66  << "value: " << e.value;
67  return os;
68  }
69  };
71  struct Queue {
72  // entry in the queue
73  struct QEntry {
74  // value of the instance
75  DType value;
76  // weight of instance
77  RType weight;
78  // default constructor
79  QEntry() = default;
80  // constructor
81  QEntry(DType value, RType weight)
82  : value(value), weight(weight) {}
83  // comparator on value
84  inline bool operator<(const QEntry &b) const {
85  return value < b.value;
86  }
87  };
88  // the input queue
89  std::vector<QEntry> queue;
90  // end of the queue
91  size_t qtail;
92  // push data to the queue
93  inline void Push(DType x, RType w) {
94  if (qtail == 0 || queue[qtail - 1].value != x) {
95  queue[qtail++] = QEntry(x, w);
96  } else {
97  queue[qtail - 1].weight += w;
98  }
99  }
100  inline void MakeSummary(WQSummary *out) {
101  std::sort(queue.begin(), queue.begin() + qtail);
102  out->size = 0;
103  // start update sketch
104  RType wsum = 0;
105  // construct data with unique weights
106  for (size_t i = 0; i < qtail;) {
107  size_t j = i + 1;
108  RType w = queue[i].weight;
109  while (j < qtail && queue[j].value == queue[i].value) {
110  w += queue[j].weight; ++j;
111  }
112  out->data[out->size++] = Entry(wsum, wsum + w, w, queue[i].value);
113  wsum += w; i = j;
114  }
115  }
116  };
120  size_t size;
121  // constructor
123  : data(data), size(size) {}
127  inline RType MaxError() const {
128  RType res = data[0].rmax - data[0].rmin - data[0].wmin;
129  for (size_t i = 1; i < size; ++i) {
130  res = std::max(data[i].RMaxPrev() - data[i - 1].RMinNext(), res);
131  res = std::max(data[i].rmax - data[i].rmin - data[i].wmin, res);
132  }
133  return res;
134  }
140  inline Entry Query(DType qvalue, size_t &istart) const { // NOLINT(*)
141  while (istart < size && qvalue > data[istart].value) {
142  ++istart;
143  }
144  if (istart == size) {
145  RType rmax = data[size - 1].rmax;
146  return Entry(rmax, rmax, 0.0f, qvalue);
147  }
148  if (qvalue == data[istart].value) {
149  return data[istart];
150  } else {
151  if (istart == 0) {
152  return Entry(0.0f, 0.0f, 0.0f, qvalue);
153  } else {
154  return Entry(data[istart - 1].RMinNext(),
155  data[istart].RMaxPrev(),
156  0.0f, qvalue);
157  }
158  }
159  }
161  inline RType MaxRank() const {
162  return data[size - 1].rmax;
163  }
168  inline void CopyFrom(const WQSummary &src) {
169  if (!src.data) {
170  CHECK_EQ(src.size, 0);
171  size = 0;
172  return;
173  }
174  if (!data) {
175  CHECK_EQ(this->size, 0);
176  CHECK_EQ(src.size, 0);
177  return;
178  }
179  size = src.size;
180  std::memcpy(data, src.data, sizeof(Entry) * size);
181  }
182  inline void MakeFromSorted(const Entry* entries, size_t n) {
183  size = 0;
184  for (size_t i = 0; i < n;) {
185  size_t j = i + 1;
186  // ignore repeated values
187  for (; j < n && entries[j].value == entries[i].value; ++j) {}
188  data[size++] = Entry(entries[i].rmin, entries[i].rmax, entries[i].wmin,
189  entries[i].value);
190  i = j;
191  }
192  }
199  inline void CheckValid(RType eps) const {
200  for (size_t i = 0; i < size; ++i) {
201  data[i].CheckValid(eps);
202  if (i != 0) {
203  CHECK(data[i].rmin >= data[i - 1].rmin + data[i - 1].wmin) << "rmin range constraint";
204  CHECK(data[i].rmax >= data[i - 1].rmax + data[i].wmin) << "rmax range constraint";
205  }
206  }
207  }
208 
215  void SetPrune(const WQSummary &src, size_t maxsize) {
216  if (src.size <= maxsize) {
217  this->CopyFrom(src); return;
218  }
219  const RType begin = src.data[0].rmax;
220  const RType range = src.data[src.size - 1].rmin - src.data[0].rmax;
221  const size_t n = maxsize - 1;
222  data[0] = src.data[0];
223  this->size = 1;
224  // lastidx is used to avoid duplicated records
225  size_t i = 1, lastidx = 0;
226  for (size_t k = 1; k < n; ++k) {
227  RType dx2 = 2 * ((k * range) / n + begin);
228  // find first i such that d < (rmax[i+1] + rmin[i+1]) / 2
229  while (i < src.size - 1
230  && dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i;
231  if (i == src.size - 1) break;
232  if (dx2 < src.data[i].RMinNext() + src.data[i + 1].RMaxPrev()) {
233  if (i != lastidx) {
234  data[size++] = src.data[i]; lastidx = i;
235  }
236  } else {
237  if (i + 1 != lastidx) {
238  data[size++] = src.data[i + 1]; lastidx = i + 1;
239  }
240  }
241  }
242  if (lastidx != src.size - 1) {
243  data[size++] = src.data[src.size - 1];
244  }
245  }
251  inline void SetCombine(const WQSummary &sa,
252  const WQSummary &sb) {
253  if (sa.size == 0) {
254  this->CopyFrom(sb); return;
255  }
256  if (sb.size == 0) {
257  this->CopyFrom(sa); return;
258  }
259  CHECK(sa.size > 0 && sb.size > 0);
260  const Entry *a = sa.data, *a_end = sa.data + sa.size;
261  const Entry *b = sb.data, *b_end = sb.data + sb.size;
262  // extended rmin value
263  RType aprev_rmin = 0, bprev_rmin = 0;
264  Entry *dst = this->data;
265  while (a != a_end && b != b_end) {
266  // duplicated value entry
267  if (a->value == b->value) {
268  *dst = Entry(a->rmin + b->rmin,
269  a->rmax + b->rmax,
270  a->wmin + b->wmin, a->value);
271  aprev_rmin = a->RMinNext();
272  bprev_rmin = b->RMinNext();
273  ++dst; ++a; ++b;
274  } else if (a->value < b->value) {
275  *dst = Entry(a->rmin + bprev_rmin,
276  a->rmax + b->RMaxPrev(),
277  a->wmin, a->value);
278  aprev_rmin = a->RMinNext();
279  ++dst; ++a;
280  } else {
281  *dst = Entry(b->rmin + aprev_rmin,
282  b->rmax + a->RMaxPrev(),
283  b->wmin, b->value);
284  bprev_rmin = b->RMinNext();
285  ++dst; ++b;
286  }
287  }
288  if (a != a_end) {
289  RType brmax = (b_end - 1)->rmax;
290  do {
291  *dst = Entry(a->rmin + bprev_rmin, a->rmax + brmax, a->wmin, a->value);
292  ++dst; ++a;
293  } while (a != a_end);
294  }
295  if (b != b_end) {
296  RType armax = (a_end - 1)->rmax;
297  do {
298  *dst = Entry(b->rmin + aprev_rmin, b->rmax + armax, b->wmin, b->value);
299  ++dst; ++b;
300  } while (b != b_end);
301  }
302  this->size = dst - data;
303  const RType tol = 10;
304  RType err_mingap, err_maxgap, err_wgap;
305  this->FixError(&err_mingap, &err_maxgap, &err_wgap);
306  if (err_mingap > tol || err_maxgap > tol || err_wgap > tol) {
307  LOG(INFO) << "mingap=" << err_mingap
308  << ", maxgap=" << err_maxgap
309  << ", wgap=" << err_wgap;
310  }
311  CHECK(size <= sa.size + sb.size) << "bug in combine";
312  }
313  // helper function to print the current content of sketch
314  inline void Print() const {
315  for (size_t i = 0; i < this->size; ++i) {
316  LOG(CONSOLE) << "[" << i << "] rmin=" << data[i].rmin
317  << ", rmax=" << data[i].rmax
318  << ", wmin=" << data[i].wmin
319  << ", v=" << data[i].value;
320  }
321  }
322  // try to fix rounding error
323  // and re-establish invariance
324  inline void FixError(RType *err_mingap,
325  RType *err_maxgap,
326  RType *err_wgap) const {
327  *err_mingap = 0;
328  *err_maxgap = 0;
329  *err_wgap = 0;
330  RType prev_rmin = 0, prev_rmax = 0;
331  for (size_t i = 0; i < this->size; ++i) {
332  if (data[i].rmin < prev_rmin) {
333  data[i].rmin = prev_rmin;
334  *err_mingap = std::max(*err_mingap, prev_rmin - data[i].rmin);
335  } else {
336  prev_rmin = data[i].rmin;
337  }
338  if (data[i].rmax < prev_rmax) {
339  data[i].rmax = prev_rmax;
340  *err_maxgap = std::max(*err_maxgap, prev_rmax - data[i].rmax);
341  }
342  RType rmin_next = data[i].RMinNext();
343  if (data[i].rmax < rmin_next) {
344  data[i].rmax = rmin_next;
345  *err_wgap = std::max(*err_wgap, data[i].rmax - rmin_next);
346  }
347  prev_rmax = data[i].rmax;
348  }
349  }
350  // check consistency of the summary
351  inline bool Check(const char *msg) const {
352  const float tol = 10.0f;
353  for (size_t i = 0; i < this->size; ++i) {
354  if (data[i].rmin + data[i].wmin > data[i].rmax + tol ||
355  data[i].rmin < -1e-6f || data[i].rmax < -1e-6f) {
356  LOG(INFO) << "---------- WQSummary::Check did not pass ----------";
357  this->Print();
358  return false;
359  }
360  }
361  return true;
362  }
363 };
364 
366 template<typename DType, typename RType>
367 struct WXQSummary : public WQSummary<DType, RType> {
368  // redefine entry type
370  // constructor
372  : WQSummary<DType, RType>(data, size) {}
373  // check if the block is large chunk
374  inline static bool CheckLarge(const Entry &e, RType chunk) {
375  return e.RMinNext() > e.RMaxPrev() + chunk;
376  }
377  // set prune
378  inline void SetPrune(const WQSummary<DType, RType> &src, size_t maxsize) {
379  if (src.size <= maxsize) {
380  this->CopyFrom(src); return;
381  }
382  RType begin = src.data[0].rmax;
383  // n is number of points exclude the min/max points
384  size_t n = maxsize - 2, nbig = 0;
385  // these is the range of data exclude the min/max point
386  RType range = src.data[src.size - 1].rmin - begin;
387  // prune off zero weights
388  if (range == 0.0f || maxsize <= 2) {
389  // special case, contain only two effective data pts
390  this->data[0] = src.data[0];
391  this->data[1] = src.data[src.size - 1];
392  this->size = 2;
393  return;
394  } else {
395  range = std::max(range, static_cast<RType>(1e-3f));
396  }
397  // Get a big enough chunk size, bigger than range / n
398  // (multiply by 2 is a safe factor)
399  const RType chunk = 2 * range / n;
400  // minimized range
401  RType mrange = 0;
402  {
403  // first scan, grab all the big chunk
404  // moving block index, exclude the two ends.
405  size_t bid = 0;
406  for (size_t i = 1; i < src.size - 1; ++i) {
407  // detect big chunk data point in the middle
408  // always save these data points.
409  if (CheckLarge(src.data[i], chunk)) {
410  if (bid != i - 1) {
411  // accumulate the range of the rest points
412  mrange += src.data[i].RMaxPrev() - src.data[bid].RMinNext();
413  }
414  bid = i; ++nbig;
415  }
416  }
417  if (bid != src.size - 2) {
418  mrange += src.data[src.size-1].RMaxPrev() - src.data[bid].RMinNext();
419  }
420  }
421  // assert: there cannot be more than n big data points
422  if (nbig >= n) {
423  // see what was the case
424  LOG(INFO) << " check quantile stats, nbig=" << nbig << ", n=" << n;
425  LOG(INFO) << " srcsize=" << src.size << ", maxsize=" << maxsize
426  << ", range=" << range << ", chunk=" << chunk;
427  src.Print();
428  CHECK(nbig < n) << "quantile: too many large chunk";
429  }
430  this->data[0] = src.data[0];
431  this->size = 1;
432  // The counter on the rest of points, to be selected equally from small chunks.
433  n = n - nbig;
434  // find the rest of point
435  size_t bid = 0, k = 1, lastidx = 0;
436  for (size_t end = 1; end < src.size; ++end) {
437  if (end == src.size - 1 || CheckLarge(src.data[end], chunk)) {
438  if (bid != end - 1) {
439  size_t i = bid;
440  RType maxdx2 = src.data[end].RMaxPrev() * 2;
441  for (; k < n; ++k) {
442  RType dx2 = 2 * ((k * mrange) / n + begin);
443  if (dx2 >= maxdx2) break;
444  while (i < end &&
445  dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i;
446  if (i == end) break;
447  if (dx2 < src.data[i].RMinNext() + src.data[i + 1].RMaxPrev()) {
448  if (i != lastidx) {
449  this->data[this->size++] = src.data[i]; lastidx = i;
450  }
451  } else {
452  if (i + 1 != lastidx) {
453  this->data[this->size++] = src.data[i + 1]; lastidx = i + 1;
454  }
455  }
456  }
457  }
458  if (lastidx != end) {
459  this->data[this->size++] = src.data[end];
460  lastidx = end;
461  }
462  bid = end;
463  // shift base by the gap
464  begin += src.data[bid].RMinNext() - src.data[bid].RMaxPrev();
465  }
466  }
467  }
468 };
476 template<typename DType, typename RType, class TSummary>
478  public:
479  static float constexpr kFactor = 8.0;
480 
481  public:
483  using Summary = TSummary;
485  using Entry = typename Summary::Entry;
487  struct SummaryContainer : public Summary {
488  std::vector<Entry> space;
489  SummaryContainer(const SummaryContainer &src) : Summary(nullptr, src.size) {
490  this->space = src.space;
491  this->data = dmlc::BeginPtr(this->space);
492  }
493  SummaryContainer() : Summary(nullptr, 0) {
494  }
496  inline void Reserve(size_t size) {
497  if (size > space.size()) {
498  space.resize(size);
499  this->data = dmlc::BeginPtr(space);
500  }
501  }
508  inline void Reduce(const Summary &src, size_t max_nbyte) {
509  this->Reserve((max_nbyte - sizeof(this->size)) / sizeof(Entry));
511  temp.Reserve(this->size + src.size);
512  temp.SetCombine(*this, src);
513  this->SetPrune(temp, space.size());
514  }
516  inline static size_t CalcMemCost(size_t nentry) {
517  return sizeof(size_t) + sizeof(Entry) * nentry;
518  }
520  template<typename TStream>
521  inline void Save(TStream &fo) const { // NOLINT(*)
522  fo.Write(&(this->size), sizeof(this->size));
523  if (this->size != 0) {
524  fo.Write(this->data, this->size * sizeof(Entry));
525  }
526  }
528  template<typename TStream>
529  inline void Load(TStream &fi) { // NOLINT(*)
530  CHECK_EQ(fi.Read(&this->size, sizeof(this->size)), sizeof(this->size));
531  this->Reserve(this->size);
532  if (this->size != 0) {
533  CHECK_EQ(fi.Read(this->data, this->size * sizeof(Entry)),
534  this->size * sizeof(Entry));
535  }
536  }
537  };
543  inline void Init(size_t maxn, double eps) {
544  LimitSizeLevel(maxn, eps, &nlevel, &limit_size);
545  // lazy reserve the space, if there is only one value, no need to allocate space
546  inqueue.queue.resize(1);
547  inqueue.qtail = 0;
548  data.clear();
549  level.clear();
550  }
551 
552  inline static void LimitSizeLevel
553  (size_t maxn, double eps, size_t* out_nlevel, size_t* out_limit_size) {
554  size_t& nlevel = *out_nlevel;
555  size_t& limit_size = *out_limit_size;
556  nlevel = 1;
557  while (true) {
558  limit_size = static_cast<size_t>(ceil(nlevel / eps)) + 1;
559  limit_size = std::min(maxn, limit_size);
560  size_t n = (1ULL << nlevel);
561  if (n * limit_size >= maxn) break;
562  ++nlevel;
563  }
564  // check invariant
565  size_t n = (1ULL << nlevel);
566  CHECK(n * limit_size >= maxn) << "invalid init parameter";
567  CHECK(nlevel <= std::max(static_cast<size_t>(1), static_cast<size_t>(limit_size * eps)))
568  << "invalid init parameter";
569  }
570 
576  inline void Push(DType x, RType w = 1) {
577  if (w == static_cast<RType>(0)) return;
578  if (inqueue.qtail == inqueue.queue.size()) {
579  // jump from lazy one value to limit_size * 2
580  if (inqueue.queue.size() == 1) {
581  inqueue.queue.resize(limit_size * 2);
582  } else {
583  temp.Reserve(limit_size * 2);
584  inqueue.MakeSummary(&temp);
585  // cleanup queue
586  inqueue.qtail = 0;
587  this->PushTemp();
588  }
589  }
590  inqueue.Push(x, w);
591  }
592 
593  inline void PushSummary(const Summary& summary) {
594  temp.Reserve(limit_size * 2);
595  temp.SetPrune(summary, limit_size * 2);
596  PushTemp();
597  }
598 
600  inline void PushTemp() {
601  temp.Reserve(limit_size * 2);
602  for (size_t l = 1; true; ++l) {
603  this->InitLevel(l + 1);
604  // check if level l is empty
605  if (level[l].size == 0) {
606  level[l].SetPrune(temp, limit_size);
607  break;
608  } else {
609  // level 0 is actually temp space
610  level[0].SetPrune(temp, limit_size);
611  temp.SetCombine(level[0], level[l]);
612  if (temp.size > limit_size) {
613  // try next level
614  level[l].size = 0;
615  } else {
616  // if merged record is still smaller, no need to send to next level
617  level[l].CopyFrom(temp); break;
618  }
619  }
620  }
621  }
623  inline void GetSummary(SummaryContainer *out) {
624  if (level.size() != 0) {
625  out->Reserve(limit_size * 2);
626  } else {
627  out->Reserve(inqueue.queue.size());
628  }
629  inqueue.MakeSummary(out);
630  if (level.size() != 0) {
631  level[0].SetPrune(*out, limit_size);
632  for (size_t l = 1; l < level.size(); ++l) {
633  if (level[l].size == 0) continue;
634  if (level[0].size == 0) {
635  level[0].CopyFrom(level[l]);
636  } else {
637  out->SetCombine(level[0], level[l]);
638  level[0].SetPrune(*out, limit_size);
639  }
640  }
641  out->CopyFrom(level[0]);
642  } else {
643  if (out->size > limit_size) {
645  temp.SetPrune(*out, limit_size);
646  out->CopyFrom(temp);
647  }
648  }
649  }
650  // used for debug, check if the sketch is valid
651  inline void CheckValid(RType eps) const {
652  for (size_t l = 1; l < level.size(); ++l) {
653  level[l].CheckValid(eps);
654  }
655  }
656  // initialize level space to at least nlevel
657  inline void InitLevel(size_t nlevel) {
658  if (level.size() >= nlevel) return;
659  data.resize(limit_size * nlevel);
660  level.resize(nlevel, Summary(nullptr, 0));
661  for (size_t l = 0; l < level.size(); ++l) {
662  level[l].data = dmlc::BeginPtr(data) + l * limit_size;
663  }
664  }
665  // input data queue
666  typename Summary::Queue inqueue;
667  // number of levels
668  size_t nlevel;
669  // size of summary in each level
670  size_t limit_size;
671  // the level of each summaries
672  std::vector<Summary> level;
673  // content of the summary
674  std::vector<Entry> data;
675  // temporal summary, used for temp-merge
676  SummaryContainer temp;
677 };
678 
684 template<typename DType, typename RType = unsigned>
686  public QuantileSketchTemplate<DType, RType, WQSummary<DType, RType> > {
687 };
688 
694 template<typename DType, typename RType = unsigned>
696  public QuantileSketchTemplate<DType, RType, WXQSummary<DType, RType> > {
697 };
698 
699 class HistogramCuts;
700 
705  public:
707 
708  private:
709  std::vector<WQSketch> sketches_;
710  std::vector<bst_row_t> columns_size_;
711  int32_t max_bins_;
712  bool use_group_ind_{false};
713  Monitor monitor_;
714 
715  public:
716  /* \brief Initialize necessary info.
717  *
718  * \param columns_size Size of each column.
719  * \param max_bins maximum number of bins for each feature.
720  * \param use_group whether is assigned to group to data instance.
721  */
722  HostSketchContainer(std::vector<bst_row_t> columns_size, int32_t max_bins,
723  bool use_group);
724 
725  static bool UseGroup(MetaInfo const &info) {
726  size_t const num_groups =
727  info.group_ptr_.size() == 0 ? 0 : info.group_ptr_.size() - 1;
728  // Use group index for weights?
729  bool const use_group_ind =
730  num_groups != 0 && (info.weights_.Size() != info.num_row_);
731  return use_group_ind;
732  }
733 
734  static std::vector<bst_row_t> CalcColumnSize(SparsePage const &page,
735  bst_feature_t const n_columns,
736  size_t const nthreads);
737 
738  static std::vector<bst_feature_t> LoadBalance(SparsePage const &page,
739  bst_feature_t n_columns,
740  size_t const nthreads);
741 
742  static uint32_t SearchGroupIndFromRow(std::vector<bst_uint> const &group_ptr,
743  size_t const base_rowid) {
744  CHECK_LT(base_rowid, group_ptr.back())
745  << "Row: " << base_rowid << " is not found in any group.";
746  bst_group_t group_ind =
747  std::upper_bound(group_ptr.cbegin(), group_ptr.cend() - 1, base_rowid) -
748  group_ptr.cbegin() - 1;
749  return group_ind;
750  }
751  // Gather sketches from all workers.
752  void GatherSketchInfo(std::vector<WQSketch::SummaryContainer> const &reduced,
753  std::vector<bst_row_t> *p_worker_segments,
754  std::vector<bst_row_t> *p_sketches_scan,
755  std::vector<WQSketch::Entry> *p_global_sketches);
756  // Merge sketches from all workers.
757  void AllReduce(std::vector<WQSketch::SummaryContainer> *p_reduced,
758  std::vector<int32_t>* p_num_cuts);
759 
760  /* \brief Push a CSR matrix. */
761  void PushRowPage(SparsePage const& page, MetaInfo const& info);
762 
763  void MakeCuts(HistogramCuts* cuts);
764 };
765 } // namespace common
766 } // namespace xgboost
767 #endif // XGBOOST_COMMON_QUANTILE_H_
xgboost::common::HostSketchContainer::MakeCuts
void MakeCuts(HistogramCuts *cuts)
xgboost::common::WQSummary::CopyFrom
void CopyFrom(const WQSummary &src)
copy content from src
Definition: quantile.h:168
xgboost::common::WQSummary::Queue::MakeSummary
void MakeSummary(WQSummary *out)
Definition: quantile.h:100
xgboost::common::HostSketchContainer::AllReduce
void AllReduce(std::vector< WQSketch::SummaryContainer > *p_reduced, std::vector< int32_t > *p_num_cuts)
xgboost::MetaInfo::num_row_
uint64_t num_row_
number of rows in the data
Definition: data.h:51
xgboost::common::WQSummary::Check
bool Check(const char *msg) const
Definition: quantile.h:351
xgboost::common::QuantileSketchTemplate::LimitSizeLevel
static void LimitSizeLevel(size_t maxn, double eps, size_t *out_nlevel, size_t *out_limit_size)
Definition: quantile.h:553
xgboost::common::WQSummary::FixError
void FixError(RType *err_mingap, RType *err_maxgap, RType *err_wgap) const
Definition: quantile.h:324
xgboost::common::QuantileSketchTemplate::SummaryContainer::Reserve
void Reserve(size_t size)
reserve space for summary
Definition: quantile.h:496
xgboost::common::QuantileSketchTemplate::CheckValid
void CheckValid(RType eps) const
Definition: quantile.h:651
xgboost::common::HostSketchContainer
Definition: quantile.h:704
xgboost::common::WQSummary::MakeFromSorted
void MakeFromSorted(const Entry *entries, size_t n)
Definition: quantile.h:182
xgboost::SparsePage
In-memory storage unit of sparse batch, stored in CSR format.
Definition: data.h:243
xgboost::common::WXQSummary::WXQSummary
WXQSummary(Entry *data, size_t size)
Definition: quantile.h:371
xgboost::common::QuantileSketchTemplate::SummaryContainer::Save
void Save(TStream &fo) const
save the data structure into stream
Definition: quantile.h:521
xgboost::common::WQSummary::Entry::RMaxPrev
XGBOOST_DEVICE RType RMaxPrev() const
Definition: quantile.h:58
xgboost::common::HistogramCuts
Definition: hist_util.h:37
xgboost::common::HostSketchContainer::SearchGroupIndFromRow
static uint32_t SearchGroupIndFromRow(std::vector< bst_uint > const &group_ptr, size_t const base_rowid)
Definition: quantile.h:742
xgboost::common::QuantileSketchTemplate::limit_size
size_t limit_size
Definition: quantile.h:670
xgboost::common::WQSummary::Queue::qtail
size_t qtail
Definition: quantile.h:91
xgboost::common::WQSummary::Entry::rmax
RType rmax
maximum rank
Definition: quantile.h:35
xgboost::common::QuantileSketchTemplate::level
std::vector< Summary > level
Definition: quantile.h:672
xgboost::common::WXQSummary
try to do efficient pruning
Definition: quantile.h:367
xgboost::common::QuantileSketchTemplate::SummaryContainer::CalcMemCost
static size_t CalcMemCost(size_t nentry)
return the number of bytes this data structure cost in serialization
Definition: quantile.h:516
xgboost::common::QuantileSketchTemplate::GetSummary
void GetSummary(SummaryContainer *out)
get the summary after finalize
Definition: quantile.h:623
xgboost::common::QuantileSketchTemplate::SummaryContainer::space
std::vector< Entry > space
Definition: quantile.h:488
xgboost::common::WQSummary::Entry::CheckValid
void CheckValid(RType eps=0) const
debug function, check Valid
Definition: quantile.h:49
xgboost::common::WQuantileSketch
Quantile sketch use WQSummary.
Definition: quantile.h:685
xgboost::common::WQSummary::Queue::Push
void Push(DType x, RType w)
Definition: quantile.h:93
xgboost::common::WQSummary::Print
void Print() const
Definition: quantile.h:314
xgboost::common::QuantileSketchTemplate::SummaryContainer
same as summary, but use STL to backup the space
Definition: quantile.h:487
xgboost::common::WQSummary::Entry::Entry
XGBOOST_DEVICE Entry(RType rmin, RType rmax, RType wmin, DType value)
Definition: quantile.h:43
xgboost::MetaInfo::group_ptr_
std::vector< bst_group_t > group_ptr_
the index of begin and end of a group needed when the learning task is ranking.
Definition: data.h:62
xgboost::common::HostSketchContainer::CalcColumnSize
static std::vector< bst_row_t > CalcColumnSize(SparsePage const &page, bst_feature_t const n_columns, size_t const nthreads)
xgboost::common::QuantileSketchTemplate::Push
void Push(DType x, RType w=1)
add an element to a sketch
Definition: quantile.h:576
xgboost::common::WQSummary::Queue::QEntry::operator<
bool operator<(const QEntry &b) const
Definition: quantile.h:84
xgboost::common::WQSummary::Entry::Entry
XGBOOST_DEVICE Entry()
Definition: quantile.h:41
xgboost::bst_feature_t
uint32_t bst_feature_t
Type for data column (feature) index.
Definition: base.h:123
xgboost::common::QuantileSketchTemplate::InitLevel
void InitLevel(size_t nlevel)
Definition: quantile.h:657
xgboost::common::WQSummary::Entry::operator<<
friend std::ostream & operator<<(std::ostream &os, Entry const &e)
Definition: quantile.h:62
xgboost::bst_group_t
uint32_t bst_group_t
Type for ranking group index.
Definition: base.h:134
xgboost::common::WQSummary::Entry::rmin
RType rmin
minimum rank
Definition: quantile.h:33
xgboost::common::WXQSummary::CheckLarge
static bool CheckLarge(const Entry &e, RType chunk)
Definition: quantile.h:374
timer.h
xgboost::common::WQSummary::Queue::QEntry::weight
RType weight
Definition: quantile.h:77
xgboost::common::WQSummary::MaxRank
RType MaxRank() const
Definition: quantile.h:161
xgboost::common::WQSummary::CheckValid
void CheckValid(RType eps) const
debug function, validate whether the summary run consistency check to check if it is a valid summary
Definition: quantile.h:199
xgboost::common::Monitor
Timing utility used to measure total method execution time over the lifetime of the containing object...
Definition: timer.h:47
xgboost::common::WQSummary::Entry
an entry in the sketch summary
Definition: quantile.h:31
xgboost::common::QuantileSketchTemplate::kFactor
static constexpr float kFactor
Definition: quantile.h:479
xgboost::common::WQSummary::Queue::QEntry::QEntry
QEntry(DType value, RType weight)
Definition: quantile.h:81
xgboost::common::HostSketchContainer::GatherSketchInfo
void GatherSketchInfo(std::vector< WQSketch::SummaryContainer > const &reduced, std::vector< bst_row_t > *p_worker_segments, std::vector< bst_row_t > *p_sketches_scan, std::vector< WQSketch::Entry > *p_global_sketches)
xgboost::common::HostSketchContainer::HostSketchContainer
HostSketchContainer(std::vector< bst_row_t > columns_size, int32_t max_bins, bool use_group)
xgboost::common::QuantileSketchTemplate::Summary
TSummary Summary
type of summary type
Definition: quantile.h:483
xgboost::common::QuantileSketchTemplate< DType, unsigned, WQSummary< DType, unsigned > >::Entry
typename Summary::Entry Entry
the entry type
Definition: quantile.h:485
xgboost::HostDeviceVector::Size
size_t Size() const
xgboost::common::WQSummary::SetCombine
void SetCombine(const WQSummary &sa, const WQSummary &sb)
set current summary to be merged summary of sa and sb
Definition: quantile.h:251
xgboost::common::QuantileSketchTemplate::SummaryContainer::SummaryContainer
SummaryContainer(const SummaryContainer &src)
Definition: quantile.h:489
xgboost::common::HostSketchContainer::UseGroup
static bool UseGroup(MetaInfo const &info)
Definition: quantile.h:725
xgboost::common::WQSummary::WQSummary
WQSummary(Entry *data, size_t size)
Definition: quantile.h:122
xgboost::common::WQSummary::Entry::value
DType value
the value of data
Definition: quantile.h:39
xgboost::MetaInfo::weights_
HostDeviceVector< bst_float > weights_
weights of each instance, optional
Definition: data.h:64
xgboost::common::QuantileSketchTemplate
template for all quantile sketch algorithm that uses merge/prune scheme
Definition: quantile.h:477
xgboost::common::HostSketchContainer::LoadBalance
static std::vector< bst_feature_t > LoadBalance(SparsePage const &page, bst_feature_t n_columns, size_t const nthreads)
xgboost::common::QuantileSketchTemplate::SummaryContainer::Reduce
void Reduce(const Summary &src, size_t max_nbyte)
do elementwise combination of summary array this[i] = combine(this[i], src[i]) for each i
Definition: quantile.h:508
xgboost::common::WQSummary::Queue
input data queue before entering the summary
Definition: quantile.h:71
xgboost::common::WXQSummary::SetPrune
void SetPrune(const WQSummary< DType, RType > &src, size_t maxsize)
Definition: quantile.h:378
xgboost::common::QuantileSketchTemplate::PushSummary
void PushSummary(const Summary &summary)
Definition: quantile.h:593
xgboost::common::WQSummary::Entry::wmin
RType wmin
maximum weight
Definition: quantile.h:37
data.h
The input data structure of xgboost.
xgboost::common::WQSummary::size
size_t size
number of elements in the summary
Definition: quantile.h:120
xgboost::common::QuantileSketchTemplate::temp
SummaryContainer temp
Definition: quantile.h:676
xgboost::common::WQSummary
experimental wsummary
Definition: quantile.h:29
xgboost::common::WQSummary::MaxError
RType MaxError() const
Definition: quantile.h:127
xgboost::common::HostSketchContainer::PushRowPage
void PushRowPage(SparsePage const &page, MetaInfo const &info)
xgboost::common::QuantileSketchTemplate::PushTemp
void PushTemp()
push up temp
Definition: quantile.h:600
xgboost::common::WQSummary::Query
Entry Query(DType qvalue, size_t &istart) const
query qvalue, start from istart
Definition: quantile.h:140
xgboost::common::QuantileSketchTemplate::data
std::vector< Entry > data
Definition: quantile.h:674
xgboost::common::WQSummary::data
Entry * data
data field
Definition: quantile.h:118
xgboost::common::WXQuantileSketch
Quantile sketch use WXQSummary.
Definition: quantile.h:695
xgboost::common::QuantileSketchTemplate::inqueue
Summary::Queue inqueue
Definition: quantile.h:666
xgboost::MetaInfo
Meta information about dataset, always sit in memory.
Definition: data.h:45
xgboost::common::WQSummary::Entry::RMinNext
XGBOOST_DEVICE RType RMinNext() const
Definition: quantile.h:54
xgboost::common::WQSummary::SetPrune
void SetPrune(const WQSummary &src, size_t maxsize)
set current summary to be pruned summary of src assume data field is already allocated to be at least...
Definition: quantile.h:215
xgboost::common::WXQSummary::Entry
typename WQSummary< DType, RType >::Entry Entry
Definition: quantile.h:369
xgboost::common::WQSummary::Queue::QEntry
Definition: quantile.h:73
xgboost::common::QuantileSketchTemplate::SummaryContainer::SummaryContainer
SummaryContainer()
Definition: quantile.h:493
xgboost::common::QuantileSketchTemplate::SummaryContainer::Load
void Load(TStream &fi)
load data structure from input stream
Definition: quantile.h:529
xgboost::common::WQSummary::Queue::QEntry::QEntry
QEntry()=default
XGBOOST_DEVICE
#define XGBOOST_DEVICE
Tag function as usable by device.
Definition: base.h:84
xgboost::common::WQSummary::Queue::QEntry::value
DType value
Definition: quantile.h:75
xgboost::common::QuantileSketchTemplate::nlevel
size_t nlevel
Definition: quantile.h:668
xgboost::common::QuantileSketchTemplate::Init
void Init(size_t maxn, double eps)
initialize the quantile sketch, given the performance specification
Definition: quantile.h:543
xgboost::common::WQSummary::Queue::queue
std::vector< QEntry > queue
Definition: quantile.h:89
xgboost
namespace of xgboost
Definition: base.h:110