xgboost
group_data.h
Go to the documentation of this file.
1 
14 #ifndef XGBOOST_COMMON_GROUP_DATA_H_
15 #define XGBOOST_COMMON_GROUP_DATA_H_
16 
17 #include <vector>
18 #include <algorithm>
19 
20 #include "xgboost/base.h"
21 
22 namespace xgboost {
23 namespace common {
29 template<typename ValueType, typename SizeType = bst_ulong>
31  public:
42  ParallelGroupBuilder(std::vector<SizeType> *p_rptr,
43  std::vector<ValueType> *p_data,
44  size_t base_row_offset = 0)
45  : rptr_(*p_rptr),
46  data_(*p_data),
47  thread_rptr_(tmp_thread_rptr_),
48  base_row_offset_(base_row_offset) {}
49  ParallelGroupBuilder(std::vector<SizeType> *p_rptr,
50  std::vector<ValueType> *p_data,
51  std::vector<std::vector<SizeType> > *p_thread_rptr,
52  size_t base_row_offset = 0)
53  : rptr_(*p_rptr),
54  data_(*p_data),
55  thread_rptr_(*p_thread_rptr),
56  base_row_offset_(base_row_offset) {}
57 
64  inline void InitBudget(std::size_t max_key, int nthread) {
65  thread_rptr_.resize(nthread);
66  for (std::size_t i = 0; i < thread_rptr_.size(); ++i) {
67  thread_rptr_[i].resize(max_key - std::min(base_row_offset_, max_key));
68  std::fill(thread_rptr_[i].begin(), thread_rptr_[i].end(), 0);
69  }
70  }
77  inline void AddBudget(std::size_t key, int threadid, SizeType nelem = 1) {
78  std::vector<SizeType> &trptr = thread_rptr_[threadid];
79  size_t offset_key = key - base_row_offset_;
80  if (trptr.size() < offset_key + 1) {
81  trptr.resize(offset_key + 1, 0);
82  }
83  trptr[offset_key] += nelem;
84  }
86  inline void InitStorage() {
87  // set rptr to correct size
88  SizeType rptr_fill_value = rptr_.empty() ? 0 : rptr_.back();
89  for (std::size_t tid = 0; tid < thread_rptr_.size(); ++tid) {
90  if (rptr_.size() <= thread_rptr_[tid].size() + base_row_offset_) {
91  rptr_.resize(thread_rptr_[tid].size() + base_row_offset_ + 1,
92  rptr_fill_value); // key + 1
93  }
94  }
95  // initialize rptr to be beginning of each segment
96  std::size_t count = 0;
97  for (std::size_t i = base_row_offset_; i + 1 < rptr_.size(); ++i) {
98  for (std::size_t tid = 0; tid < thread_rptr_.size(); ++tid) {
99  std::vector<SizeType> &trptr = thread_rptr_[tid];
100  if (i < trptr.size() +
101  base_row_offset_) { // i^th row is assigned for this thread
102  std::size_t thread_count =
103  trptr[i - base_row_offset_]; // how many entries in this row
104  trptr[i - base_row_offset_] = count + rptr_.back();
105  count += thread_count;
106  }
107  }
108  rptr_[i + 1] += count; // pointer accumulated from all thread
109  }
110  data_.resize(rptr_.back());
111  }
120  void Push(std::size_t key, ValueType value, int threadid) {
121  size_t offset_key = key - base_row_offset_;
122  SizeType &rp = thread_rptr_[threadid][offset_key];
123  data_[rp++] = value;
124  }
125 
126  private:
128  std::vector<SizeType> &rptr_;
130  std::vector<ValueType> &data_;
132  std::vector<std::vector<SizeType> > &thread_rptr_;
134  std::vector<std::vector<SizeType> > tmp_thread_rptr_;
136  size_t base_row_offset_;
137 };
138 } // namespace common
139 } // namespace xgboost
140 #endif // XGBOOST_COMMON_GROUP_DATA_H_
multi-thread version of group builder
Definition: group_data.h:30
ParallelGroupBuilder(std::vector< SizeType > *p_rptr, std::vector< ValueType > *p_data, size_t base_row_offset=0)
parallel group builder of data.
Definition: group_data.h:42
void InitBudget(std::size_t max_key, int nthread)
step 1: initialize the helper, with hint of number keys and thread used in the construction ...
Definition: group_data.h:64
ParallelGroupBuilder(std::vector< SizeType > *p_rptr, std::vector< ValueType > *p_data, std::vector< std::vector< SizeType > > *p_thread_rptr, size_t base_row_offset=0)
Definition: group_data.h:49
namespace of xgboost
Definition: base.h:102
defines configuration macros of xgboost.
void InitStorage()
step 3: initialize the necessary storage
Definition: group_data.h:86
void Push(std::size_t key, ValueType value, int threadid)
step 4: add data to the allocated space, the calls to this function should be exactly match previous ...
Definition: group_data.h:120
void AddBudget(std::size_t key, int threadid, SizeType nelem=1)
step 2: add budget to each key
Definition: group_data.h:77