xgboost
quantile.h
Go to the documentation of this file.
1 
7 #ifndef XGBOOST_COMMON_QUANTILE_H_
8 #define XGBOOST_COMMON_QUANTILE_H_
9 
10 #include <dmlc/base.h>
11 #include <xgboost/logging.h>
12 #include <cmath>
13 #include <vector>
14 #include <cstring>
15 #include <algorithm>
16 #include <iostream>
17 
18 namespace xgboost {
19 namespace common {
25 template<typename DType, typename RType>
26 struct WQSummary {
28  struct Entry {
30  RType rmin;
32  RType rmax;
34  RType wmin;
36  DType value;
37  // constructor
38  XGBOOST_DEVICE Entry() {} // NOLINT
39  // constructor
40  XGBOOST_DEVICE Entry(RType rmin, RType rmax, RType wmin, DType value)
41  : rmin(rmin), rmax(rmax), wmin(wmin), value(value) {}
46  inline void CheckValid(RType eps = 0) const {
47  CHECK(rmin >= 0 && rmax >= 0 && wmin >= 0) << "nonneg constraint";
48  CHECK(rmax- rmin - wmin > -eps) << "relation constraint: min/max";
49  }
51  XGBOOST_DEVICE inline RType RMinNext() const {
52  return rmin + wmin;
53  }
55  XGBOOST_DEVICE inline RType RMaxPrev() const {
56  return rmax - wmin;
57  }
58 
59  friend std::ostream& operator<<(std::ostream& os, Entry const& e) {
60  os << "rmin: " << e.rmin << ", "
61  << "rmax: " << e.rmax << ", "
62  << "wmin: " << e.wmin << ", "
63  << "value: " << e.value;
64  return os;
65  }
66  };
68  struct Queue {
69  // entry in the queue
70  struct QEntry {
71  // value of the instance
72  DType value;
73  // weight of instance
74  RType weight;
75  // default constructor
76  QEntry() = default;
77  // constructor
78  QEntry(DType value, RType weight)
79  : value(value), weight(weight) {}
80  // comparator on value
81  inline bool operator<(const QEntry &b) const {
82  return value < b.value;
83  }
84  };
85  // the input queue
86  std::vector<QEntry> queue;
87  // end of the queue
88  size_t qtail;
89  // push data to the queue
90  inline void Push(DType x, RType w) {
91  if (qtail == 0 || queue[qtail - 1].value != x) {
92  queue[qtail++] = QEntry(x, w);
93  } else {
94  queue[qtail - 1].weight += w;
95  }
96  }
97  inline void MakeSummary(WQSummary *out) {
98  std::sort(queue.begin(), queue.begin() + qtail);
99  out->size = 0;
100  // start update sketch
101  RType wsum = 0;
102  // construct data with unique weights
103  for (size_t i = 0; i < qtail;) {
104  size_t j = i + 1;
105  RType w = queue[i].weight;
106  while (j < qtail && queue[j].value == queue[i].value) {
107  w += queue[j].weight; ++j;
108  }
109  out->data[out->size++] = Entry(wsum, wsum + w, w, queue[i].value);
110  wsum += w; i = j;
111  }
112  }
113  };
117  size_t size;
118  // constructor
119  WQSummary(Entry *data, size_t size)
120  : data(data), size(size) {}
124  inline RType MaxError() const {
125  RType res = data[0].rmax - data[0].rmin - data[0].wmin;
126  for (size_t i = 1; i < size; ++i) {
127  res = std::max(data[i].RMaxPrev() - data[i - 1].RMinNext(), res);
128  res = std::max(data[i].rmax - data[i].rmin - data[i].wmin, res);
129  }
130  return res;
131  }
137  inline Entry Query(DType qvalue, size_t &istart) const { // NOLINT(*)
138  while (istart < size && qvalue > data[istart].value) {
139  ++istart;
140  }
141  if (istart == size) {
142  RType rmax = data[size - 1].rmax;
143  return Entry(rmax, rmax, 0.0f, qvalue);
144  }
145  if (qvalue == data[istart].value) {
146  return data[istart];
147  } else {
148  if (istart == 0) {
149  return Entry(0.0f, 0.0f, 0.0f, qvalue);
150  } else {
151  return Entry(data[istart - 1].RMinNext(),
152  data[istart].RMaxPrev(),
153  0.0f, qvalue);
154  }
155  }
156  }
158  inline RType MaxRank() const {
159  return data[size - 1].rmax;
160  }
165  inline void CopyFrom(const WQSummary &src) {
166  size = src.size;
167  std::memcpy(data, src.data, sizeof(Entry) * size);
168  }
169  inline void MakeFromSorted(const Entry* entries, size_t n) {
170  size = 0;
171  for (size_t i = 0; i < n;) {
172  size_t j = i + 1;
173  // ignore repeated values
174  for (; j < n && entries[j].value == entries[i].value; ++j) {}
175  data[size++] = Entry(entries[i].rmin, entries[i].rmax, entries[i].wmin,
176  entries[i].value);
177  i = j;
178  }
179  }
186  inline void CheckValid(RType eps) const {
187  for (size_t i = 0; i < size; ++i) {
188  data[i].CheckValid(eps);
189  if (i != 0) {
190  CHECK(data[i].rmin >= data[i - 1].rmin + data[i - 1].wmin) << "rmin range constraint";
191  CHECK(data[i].rmax >= data[i - 1].rmax + data[i].wmin) << "rmax range constraint";
192  }
193  }
194  }
195 
202  void SetPrune(const WQSummary &src, size_t maxsize) {
203  if (src.size <= maxsize) {
204  this->CopyFrom(src); return;
205  }
206  const RType begin = src.data[0].rmax;
207  const RType range = src.data[src.size - 1].rmin - src.data[0].rmax;
208  const size_t n = maxsize - 1;
209  data[0] = src.data[0];
210  this->size = 1;
211  // lastidx is used to avoid duplicated records
212  size_t i = 1, lastidx = 0;
213  for (size_t k = 1; k < n; ++k) {
214  RType dx2 = 2 * ((k * range) / n + begin);
215  // find first i such that d < (rmax[i+1] + rmin[i+1]) / 2
216  while (i < src.size - 1
217  && dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i;
218  if (i == src.size - 1) break;
219  if (dx2 < src.data[i].RMinNext() + src.data[i + 1].RMaxPrev()) {
220  if (i != lastidx) {
221  data[size++] = src.data[i]; lastidx = i;
222  }
223  } else {
224  if (i + 1 != lastidx) {
225  data[size++] = src.data[i + 1]; lastidx = i + 1;
226  }
227  }
228  }
229  if (lastidx != src.size - 1) {
230  data[size++] = src.data[src.size - 1];
231  }
232  }
238  inline void SetCombine(const WQSummary &sa,
239  const WQSummary &sb) {
240  if (sa.size == 0) {
241  this->CopyFrom(sb); return;
242  }
243  if (sb.size == 0) {
244  this->CopyFrom(sa); return;
245  }
246  CHECK(sa.size > 0 && sb.size > 0);
247  const Entry *a = sa.data, *a_end = sa.data + sa.size;
248  const Entry *b = sb.data, *b_end = sb.data + sb.size;
249  // extended rmin value
250  RType aprev_rmin = 0, bprev_rmin = 0;
251  Entry *dst = this->data;
252  while (a != a_end && b != b_end) {
253  // duplicated value entry
254  if (a->value == b->value) {
255  *dst = Entry(a->rmin + b->rmin,
256  a->rmax + b->rmax,
257  a->wmin + b->wmin, a->value);
258  aprev_rmin = a->RMinNext();
259  bprev_rmin = b->RMinNext();
260  ++dst; ++a; ++b;
261  } else if (a->value < b->value) {
262  *dst = Entry(a->rmin + bprev_rmin,
263  a->rmax + b->RMaxPrev(),
264  a->wmin, a->value);
265  aprev_rmin = a->RMinNext();
266  ++dst; ++a;
267  } else {
268  *dst = Entry(b->rmin + aprev_rmin,
269  b->rmax + a->RMaxPrev(),
270  b->wmin, b->value);
271  bprev_rmin = b->RMinNext();
272  ++dst; ++b;
273  }
274  }
275  if (a != a_end) {
276  RType brmax = (b_end - 1)->rmax;
277  do {
278  *dst = Entry(a->rmin + bprev_rmin, a->rmax + brmax, a->wmin, a->value);
279  ++dst; ++a;
280  } while (a != a_end);
281  }
282  if (b != b_end) {
283  RType armax = (a_end - 1)->rmax;
284  do {
285  *dst = Entry(b->rmin + aprev_rmin, b->rmax + armax, b->wmin, b->value);
286  ++dst; ++b;
287  } while (b != b_end);
288  }
289  this->size = dst - data;
290  const RType tol = 10;
291  RType err_mingap, err_maxgap, err_wgap;
292  this->FixError(&err_mingap, &err_maxgap, &err_wgap);
293  if (err_mingap > tol || err_maxgap > tol || err_wgap > tol) {
294  LOG(INFO) << "mingap=" << err_mingap
295  << ", maxgap=" << err_maxgap
296  << ", wgap=" << err_wgap;
297  }
298  CHECK(size <= sa.size + sb.size) << "bug in combine";
299  }
300  // helper function to print the current content of sketch
301  inline void Print() const {
302  for (size_t i = 0; i < this->size; ++i) {
303  LOG(CONSOLE) << "[" << i << "] rmin=" << data[i].rmin
304  << ", rmax=" << data[i].rmax
305  << ", wmin=" << data[i].wmin
306  << ", v=" << data[i].value;
307  }
308  }
309  // try to fix rounding error
310  // and re-establish invariance
311  inline void FixError(RType *err_mingap,
312  RType *err_maxgap,
313  RType *err_wgap) const {
314  *err_mingap = 0;
315  *err_maxgap = 0;
316  *err_wgap = 0;
317  RType prev_rmin = 0, prev_rmax = 0;
318  for (size_t i = 0; i < this->size; ++i) {
319  if (data[i].rmin < prev_rmin) {
320  data[i].rmin = prev_rmin;
321  *err_mingap = std::max(*err_mingap, prev_rmin - data[i].rmin);
322  } else {
323  prev_rmin = data[i].rmin;
324  }
325  if (data[i].rmax < prev_rmax) {
326  data[i].rmax = prev_rmax;
327  *err_maxgap = std::max(*err_maxgap, prev_rmax - data[i].rmax);
328  }
329  RType rmin_next = data[i].RMinNext();
330  if (data[i].rmax < rmin_next) {
331  data[i].rmax = rmin_next;
332  *err_wgap = std::max(*err_wgap, data[i].rmax - rmin_next);
333  }
334  prev_rmax = data[i].rmax;
335  }
336  }
337  // check consistency of the summary
338  inline bool Check(const char *msg) const {
339  const float tol = 10.0f;
340  for (size_t i = 0; i < this->size; ++i) {
341  if (data[i].rmin + data[i].wmin > data[i].rmax + tol ||
342  data[i].rmin < -1e-6f || data[i].rmax < -1e-6f) {
343  LOG(INFO) << "---------- WQSummary::Check did not pass ----------";
344  this->Print();
345  return false;
346  }
347  }
348  return true;
349  }
350 };
351 
353 template<typename DType, typename RType>
354 struct WXQSummary : public WQSummary<DType, RType> {
355  // redefine entry type
357  // constructor
359  : WQSummary<DType, RType>(data, size) {}
360  // check if the block is large chunk
361  inline static bool CheckLarge(const Entry &e, RType chunk) {
362  return e.RMinNext() > e.RMaxPrev() + chunk;
363  }
364  // set prune
365  inline void SetPrune(const WQSummary<DType, RType> &src, size_t maxsize) {
366  if (src.size <= maxsize) {
367  this->CopyFrom(src); return;
368  }
369  RType begin = src.data[0].rmax;
370  // n is number of points exclude the min/max points
371  size_t n = maxsize - 2, nbig = 0;
372  // these is the range of data exclude the min/max point
373  RType range = src.data[src.size - 1].rmin - begin;
374  // prune off zero weights
375  if (range == 0.0f || maxsize <= 2) {
376  // special case, contain only two effective data pts
377  this->data[0] = src.data[0];
378  this->data[1] = src.data[src.size - 1];
379  this->size = 2;
380  return;
381  } else {
382  range = std::max(range, static_cast<RType>(1e-3f));
383  }
384  // Get a big enough chunk size, bigger than range / n
385  // (multiply by 2 is a safe factor)
386  const RType chunk = 2 * range / n;
387  // minimized range
388  RType mrange = 0;
389  {
390  // first scan, grab all the big chunk
391  // moving block index, exclude the two ends.
392  size_t bid = 0;
393  for (size_t i = 1; i < src.size - 1; ++i) {
394  // detect big chunk data point in the middle
395  // always save these data points.
396  if (CheckLarge(src.data[i], chunk)) {
397  if (bid != i - 1) {
398  // accumulate the range of the rest points
399  mrange += src.data[i].RMaxPrev() - src.data[bid].RMinNext();
400  }
401  bid = i; ++nbig;
402  }
403  }
404  if (bid != src.size - 2) {
405  mrange += src.data[src.size-1].RMaxPrev() - src.data[bid].RMinNext();
406  }
407  }
408  // assert: there cannot be more than n big data points
409  if (nbig >= n) {
410  // see what was the case
411  LOG(INFO) << " check quantile stats, nbig=" << nbig << ", n=" << n;
412  LOG(INFO) << " srcsize=" << src.size << ", maxsize=" << maxsize
413  << ", range=" << range << ", chunk=" << chunk;
414  src.Print();
415  CHECK(nbig < n) << "quantile: too many large chunk";
416  }
417  this->data[0] = src.data[0];
418  this->size = 1;
419  // The counter on the rest of points, to be selected equally from small chunks.
420  n = n - nbig;
421  // find the rest of point
422  size_t bid = 0, k = 1, lastidx = 0;
423  for (size_t end = 1; end < src.size; ++end) {
424  if (end == src.size - 1 || CheckLarge(src.data[end], chunk)) {
425  if (bid != end - 1) {
426  size_t i = bid;
427  RType maxdx2 = src.data[end].RMaxPrev() * 2;
428  for (; k < n; ++k) {
429  RType dx2 = 2 * ((k * mrange) / n + begin);
430  if (dx2 >= maxdx2) break;
431  while (i < end &&
432  dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i;
433  if (i == end) break;
434  if (dx2 < src.data[i].RMinNext() + src.data[i + 1].RMaxPrev()) {
435  if (i != lastidx) {
436  this->data[this->size++] = src.data[i]; lastidx = i;
437  }
438  } else {
439  if (i + 1 != lastidx) {
440  this->data[this->size++] = src.data[i + 1]; lastidx = i + 1;
441  }
442  }
443  }
444  }
445  if (lastidx != end) {
446  this->data[this->size++] = src.data[end];
447  lastidx = end;
448  }
449  bid = end;
450  // shift base by the gap
451  begin += src.data[bid].RMinNext() - src.data[bid].RMaxPrev();
452  }
453  }
454  }
455 };
463 template<typename DType, typename RType, class TSummary>
465  public:
466  static float constexpr kFactor = 8.0;
467 
468  public:
470  using Summary = TSummary;
472  using Entry = typename Summary::Entry;
474  struct SummaryContainer : public Summary {
475  std::vector<Entry> space;
476  SummaryContainer(const SummaryContainer &src) : Summary(nullptr, src.size) {
477  this->space = src.space;
478  this->data = dmlc::BeginPtr(this->space);
479  }
480  SummaryContainer() : Summary(nullptr, 0) {
481  }
483  inline void Reserve(size_t size) {
484  if (size > space.size()) {
485  space.resize(size);
486  this->data = dmlc::BeginPtr(space);
487  }
488  }
495  inline void Reduce(const Summary &src, size_t max_nbyte) {
496  this->Reserve((max_nbyte - sizeof(this->size)) / sizeof(Entry));
497  SummaryContainer temp;
498  temp.Reserve(this->size + src.size);
499  temp.SetCombine(*this, src);
500  this->SetPrune(temp, space.size());
501  }
503  inline static size_t CalcMemCost(size_t nentry) {
504  return sizeof(size_t) + sizeof(Entry) * nentry;
505  }
507  template<typename TStream>
508  inline void Save(TStream &fo) const { // NOLINT(*)
509  fo.Write(&(this->size), sizeof(this->size));
510  if (this->size != 0) {
511  fo.Write(this->data, this->size * sizeof(Entry));
512  }
513  }
515  template<typename TStream>
516  inline void Load(TStream &fi) { // NOLINT(*)
517  CHECK_EQ(fi.Read(&this->size, sizeof(this->size)), sizeof(this->size));
518  this->Reserve(this->size);
519  if (this->size != 0) {
520  CHECK_EQ(fi.Read(this->data, this->size * sizeof(Entry)),
521  this->size * sizeof(Entry));
522  }
523  }
524  };
530  inline void Init(size_t maxn, double eps) {
531  LimitSizeLevel(maxn, eps, &nlevel, &limit_size);
532  // lazy reserve the space, if there is only one value, no need to allocate space
533  inqueue.queue.resize(1);
534  inqueue.qtail = 0;
535  data.clear();
536  level.clear();
537  }
538 
539  inline static void LimitSizeLevel
540  (size_t maxn, double eps, size_t* out_nlevel, size_t* out_limit_size) {
541  size_t& nlevel = *out_nlevel;
542  size_t& limit_size = *out_limit_size;
543  nlevel = 1;
544  while (true) {
545  limit_size = static_cast<size_t>(ceil(nlevel / eps)) + 1;
546  limit_size = std::min(maxn, limit_size);
547  size_t n = (1ULL << nlevel);
548  if (n * limit_size >= maxn) break;
549  ++nlevel;
550  }
551  // check invariant
552  size_t n = (1ULL << nlevel);
553  CHECK(n * limit_size >= maxn) << "invalid init parameter";
554  CHECK(nlevel <= std::max(static_cast<size_t>(1), static_cast<size_t>(limit_size * eps)))
555  << "invalid init parameter";
556  }
557 
563  inline void Push(DType x, RType w = 1) {
564  if (w == static_cast<RType>(0)) return;
565  if (inqueue.qtail == inqueue.queue.size()) {
566  // jump from lazy one value to limit_size * 2
567  if (inqueue.queue.size() == 1) {
568  inqueue.queue.resize(limit_size * 2);
569  } else {
570  temp.Reserve(limit_size * 2);
571  inqueue.MakeSummary(&temp);
572  // cleanup queue
573  inqueue.qtail = 0;
574  this->PushTemp();
575  }
576  }
577  inqueue.Push(x, w);
578  }
579 
580  inline void PushSummary(const Summary& summary) {
581  temp.Reserve(limit_size * 2);
582  temp.SetPrune(summary, limit_size * 2);
583  PushTemp();
584  }
585 
587  inline void PushTemp() {
588  temp.Reserve(limit_size * 2);
589  for (size_t l = 1; true; ++l) {
590  this->InitLevel(l + 1);
591  // check if level l is empty
592  if (level[l].size == 0) {
593  level[l].SetPrune(temp, limit_size);
594  break;
595  } else {
596  // level 0 is actually temp space
597  level[0].SetPrune(temp, limit_size);
598  temp.SetCombine(level[0], level[l]);
599  if (temp.size > limit_size) {
600  // try next level
601  level[l].size = 0;
602  } else {
603  // if merged record is still smaller, no need to send to next level
604  level[l].CopyFrom(temp); break;
605  }
606  }
607  }
608  }
610  inline void GetSummary(SummaryContainer *out) {
611  if (level.size() != 0) {
612  out->Reserve(limit_size * 2);
613  } else {
614  out->Reserve(inqueue.queue.size());
615  }
616  inqueue.MakeSummary(out);
617  if (level.size() != 0) {
618  level[0].SetPrune(*out, limit_size);
619  for (size_t l = 1; l < level.size(); ++l) {
620  if (level[l].size == 0) continue;
621  if (level[0].size == 0) {
622  level[0].CopyFrom(level[l]);
623  } else {
624  out->SetCombine(level[0], level[l]);
625  level[0].SetPrune(*out, limit_size);
626  }
627  }
628  out->CopyFrom(level[0]);
629  } else {
630  if (out->size > limit_size) {
631  temp.Reserve(limit_size);
632  temp.SetPrune(*out, limit_size);
633  out->CopyFrom(temp);
634  }
635  }
636  }
637  // used for debug, check if the sketch is valid
638  inline void CheckValid(RType eps) const {
639  for (size_t l = 1; l < level.size(); ++l) {
640  level[l].CheckValid(eps);
641  }
642  }
643  // initialize level space to at least nlevel
644  inline void InitLevel(size_t nlevel) {
645  if (level.size() >= nlevel) return;
646  data.resize(limit_size * nlevel);
647  level.resize(nlevel, Summary(nullptr, 0));
648  for (size_t l = 0; l < level.size(); ++l) {
649  level[l].data = dmlc::BeginPtr(data) + l * limit_size;
650  }
651  }
652  // input data queue
653  typename Summary::Queue inqueue;
654  // number of levels
655  size_t nlevel;
656  // size of summary in each level
657  size_t limit_size;
658  // the level of each summaries
659  std::vector<Summary> level;
660  // content of the summary
661  std::vector<Entry> data;
662  // temporal summary, used for temp-merge
663  SummaryContainer temp;
664 };
665 
671 template<typename DType, typename RType = unsigned>
673  public QuantileSketchTemplate<DType, RType, WQSummary<DType, RType> > {
674 };
675 
681 template<typename DType, typename RType = unsigned>
683  public QuantileSketchTemplate<DType, RType, WXQSummary<DType, RType> > {
684 };
685 } // namespace common
686 } // namespace xgboost
687 #endif // XGBOOST_COMMON_QUANTILE_H_
void MakeSummary(WQSummary *out)
Definition: quantile.h:97
WQSummary(Entry *data, size_t size)
Definition: quantile.h:119
SummaryContainer(const SummaryContainer &src)
Definition: quantile.h:476
void SetCombine(const WQSummary &sa, const WQSummary &sb)
set current summary to be merged summary of sa and sb
Definition: quantile.h:238
typename WQSummary< DType, RType >::Entry Entry
Definition: quantile.h:356
bool operator<(const QEntry &b) const
Definition: quantile.h:81
std::vector< QEntry > queue
Definition: quantile.h:86
size_t qtail
Definition: quantile.h:88
WXQSummary(Entry *data, size_t size)
Definition: quantile.h:358
std::vector< Entry > space
Definition: quantile.h:475
size_t nlevel
Definition: quantile.h:655
RType MaxRank() const
Definition: quantile.h:158
void Reserve(size_t size)
reserve space for summary
Definition: quantile.h:483
void Save(TStream &fo) const
save the data structure into stream
Definition: quantile.h:508
SummaryContainer temp
Definition: quantile.h:663
template for all quantile sketch algorithm that uses merge/prune scheme
Definition: quantile.h:464
static size_t CalcMemCost(size_t nentry)
return the number of bytes this data structure cost in serialization
Definition: quantile.h:503
experimental wsummary
Definition: quantile.h:26
void CheckValid(RType eps) const
debug function, validate whether the summary run consistency check to check if it is a valid summary ...
Definition: quantile.h:186
void Push(DType x, RType w)
Definition: quantile.h:90
typename Summary::Entry Entry
the entry type
Definition: quantile.h:472
void Push(DType x, RType w=1)
add an element to a sketch
Definition: quantile.h:563
DType value
the value of data
Definition: quantile.h:36
void Init(size_t maxn, double eps)
initialize the quantile sketch, given the performance specification
Definition: quantile.h:530
RType wmin
maximum weight
Definition: quantile.h:34
bool Check(const char *msg) const
Definition: quantile.h:338
void CopyFrom(const WQSummary &src)
copy content from src
Definition: quantile.h:165
Entry Query(DType qvalue, size_t &istart) const
query qvalue, start from istart
Definition: quantile.h:137
size_t limit_size
Definition: quantile.h:657
void FixError(RType *err_mingap, RType *err_maxgap, RType *err_wgap) const
Definition: quantile.h:311
Quantile sketch use WXQSummary.
Definition: quantile.h:682
size_t size
number of elements in the summary
Definition: quantile.h:117
static bool CheckLarge(const Entry &e, RType chunk)
Definition: quantile.h:361
RType weight
Definition: quantile.h:74
void SetPrune(const WQSummary< DType, RType > &src, size_t maxsize)
Definition: quantile.h:365
XGBOOST_DEVICE RType RMinNext() const
Definition: quantile.h:51
Summary::Queue inqueue
Definition: quantile.h:653
Quantile sketch use WQSummary.
Definition: quantile.h:672
void PushTemp()
push up temp
Definition: quantile.h:587
void SetPrune(const WQSummary &src, size_t maxsize)
set current summary to be pruned summary of src assume data field is already allocated to be at least...
Definition: quantile.h:202
void Load(TStream &fi)
load data structure from input stream
Definition: quantile.h:516
input data queue before entering the summary
Definition: quantile.h:68
void PushSummary(const Summary &summary)
Definition: quantile.h:580
QEntry(DType value, RType weight)
Definition: quantile.h:78
#define XGBOOST_DEVICE
Tag function as usable by device.
Definition: base.h:84
same as summary, but use STL to backup the space
Definition: quantile.h:474
DType value
Definition: quantile.h:72
void InitLevel(size_t nlevel)
Definition: quantile.h:644
std::vector< Entry > data
Definition: quantile.h:661
RType MaxError() const
Definition: quantile.h:124
namespace of xgboost
Definition: base.h:102
RType rmax
maximum rank
Definition: quantile.h:32
void GetSummary(SummaryContainer *out)
get the summary after finalize
Definition: quantile.h:610
void MakeFromSorted(const Entry *entries, size_t n)
Definition: quantile.h:169
void CheckValid(RType eps) const
Definition: quantile.h:638
std::vector< Summary > level
Definition: quantile.h:659
void Reduce(const Summary &src, size_t max_nbyte)
do elementwise combination of summary array this[i] = combine(this[i], src[i]) for each i ...
Definition: quantile.h:495
XGBOOST_DEVICE RType RMaxPrev() const
Definition: quantile.h:55
void CheckValid(RType eps=0) const
debug function, check Valid
Definition: quantile.h:46
void Print() const
Definition: quantile.h:301
try to do efficient pruning
Definition: quantile.h:354
XGBOOST_DEVICE Entry(RType rmin, RType rmax, RType wmin, DType value)
Definition: quantile.h:40
friend std::ostream & operator<<(std::ostream &os, Entry const &e)
Definition: quantile.h:59
XGBOOST_DEVICE Entry()
Definition: quantile.h:38
Entry * data
data field
Definition: quantile.h:115
an entry in the sketch summary
Definition: quantile.h:28
TSummary Summary
type of summary type
Definition: quantile.h:470
RType rmin
minimum rank
Definition: quantile.h:30