xgboost
group_data.h
Go to the documentation of this file.
1 
14 #ifndef XGBOOST_COMMON_GROUP_DATA_H_
15 #define XGBOOST_COMMON_GROUP_DATA_H_
16 
17 #include <cstddef>
18 #include <vector>
19 #include <algorithm>
20 
21 #include "xgboost/base.h"
22 
23 namespace xgboost {
24 namespace common {
30 template<typename ValueType, typename SizeType = bst_ulong>
32  public:
43  ParallelGroupBuilder(std::vector<SizeType> *p_rptr,
44  std::vector<ValueType> *p_data,
45  size_t base_row_offset = 0)
46  : rptr_(*p_rptr),
47  data_(*p_data),
48  base_row_offset_(base_row_offset) {}
49 
56  void InitBudget(std::size_t max_key, int nthread) {
57  thread_rptr_.resize(nthread);
58  for (std::size_t i = 0; i < thread_rptr_.size(); ++i) {
59  thread_rptr_[i].resize(max_key - std::min(base_row_offset_, max_key));
60  std::fill(thread_rptr_[i].begin(), thread_rptr_[i].end(), 0);
61  }
62  }
69  void AddBudget(std::size_t key, int threadid, SizeType nelem = 1) {
70  std::vector<SizeType> &trptr = thread_rptr_[threadid];
71  size_t offset_key = key - base_row_offset_;
72  if (trptr.size() < offset_key + 1) {
73  trptr.resize(offset_key + 1, 0);
74  }
75  trptr[offset_key] += nelem;
76  }
78  inline void InitStorage() {
79  // set rptr to correct size
80  SizeType rptr_fill_value = rptr_.empty() ? 0 : rptr_.back();
81  for (std::size_t tid = 0; tid < thread_rptr_.size(); ++tid) {
82  if (rptr_.size() <= thread_rptr_[tid].size() + base_row_offset_) {
83  rptr_.resize(thread_rptr_[tid].size() + base_row_offset_ + 1,
84  rptr_fill_value); // key + 1
85  }
86  }
87  // initialize rptr to be beginning of each segment
88  std::size_t count = 0;
89  for (std::size_t i = base_row_offset_; i + 1 < rptr_.size(); ++i) {
90  for (std::size_t tid = 0; tid < thread_rptr_.size(); ++tid) {
91  std::vector<SizeType> &trptr = thread_rptr_[tid];
92  if (i < trptr.size() +
93  base_row_offset_) { // i^th row is assigned for this thread
94  std::size_t thread_count =
95  trptr[i - base_row_offset_]; // how many entries in this row
96  trptr[i - base_row_offset_] = count + rptr_.back();
97  count += thread_count;
98  }
99  }
100  rptr_[i + 1] += count; // pointer accumulated from all thread
101  }
102  data_.resize(rptr_.back());
103  }
112  void Push(std::size_t key, ValueType value, int threadid) {
113  size_t offset_key = key - base_row_offset_;
114  SizeType &rp = thread_rptr_[threadid][offset_key];
115  data_[rp++] = value;
116  }
117 
118  private:
120  std::vector<SizeType> &rptr_;
122  std::vector<ValueType> &data_;
124  std::vector<std::vector<SizeType> > thread_rptr_;
126  size_t base_row_offset_;
127 };
128 } // namespace common
129 } // namespace xgboost
130 #endif // XGBOOST_COMMON_GROUP_DATA_H_
multi-thread version of group builder
Definition: group_data.h:31
ParallelGroupBuilder(std::vector< SizeType > *p_rptr, std::vector< ValueType > *p_data, size_t base_row_offset=0)
parallel group builder of data.
Definition: group_data.h:43
void InitBudget(std::size_t max_key, int nthread)
step 1: initialize the helper, with hint of number keys and thread used in the construction ...
Definition: group_data.h:56
namespace of xgboost
Definition: base.h:102
defines configuration macros of xgboost.
void InitStorage()
step 3: initialize the necessary storage
Definition: group_data.h:78
void Push(std::size_t key, ValueType value, int threadid)
step 4: add data to the allocated space, the calls to this function should be exactly match previous ...
Definition: group_data.h:112
void AddBudget(std::size_t key, int threadid, SizeType nelem=1)
step 2: add budget to each key
Definition: group_data.h:69