xgboost
group_data.h
Go to the documentation of this file.
1 
14 #ifndef XGBOOST_COMMON_GROUP_DATA_H_
15 #define XGBOOST_COMMON_GROUP_DATA_H_
16 
17 #include <cstddef>
18 #include <vector>
19 #include <algorithm>
20 #include <utility>
21 
22 #include "xgboost/base.h"
23 
24 namespace xgboost {
25 namespace common {
31 template<typename ValueType, typename SizeType = bst_ulong>
33  public:
44  ParallelGroupBuilder(std::vector<SizeType> *p_rptr,
45  std::vector<ValueType> *p_data,
46  size_t base_row_offset = 0)
47  : rptr_(*p_rptr),
48  data_(*p_data),
49  base_row_offset_(base_row_offset) {}
50 
57  void InitBudget(std::size_t max_key, int nthread) {
58  thread_rptr_.resize(nthread);
59  for (std::size_t i = 0; i < thread_rptr_.size(); ++i) {
60  thread_rptr_[i].resize(max_key - std::min(base_row_offset_, max_key), 0);
61  }
62  }
63 
70  void AddBudget(std::size_t key, int threadid, SizeType nelem = 1) {
71  std::vector<SizeType> &trptr = thread_rptr_[threadid];
72  size_t offset_key = key - base_row_offset_;
73  if (trptr.size() < offset_key + 1) {
74  trptr.resize(offset_key + 1, 0);
75  }
76  trptr[offset_key] += nelem;
77  }
78 
80  inline void InitStorage() {
81  // set rptr to correct size
82  SizeType rptr_fill_value = rptr_.empty() ? 0 : rptr_.back();
83  for (std::size_t tid = 0; tid < thread_rptr_.size(); ++tid) {
84  if (rptr_.size() <= thread_rptr_[tid].size() + base_row_offset_) {
85  rptr_.resize(thread_rptr_[tid].size() + base_row_offset_ + 1,
86  rptr_fill_value); // key + 1
87  }
88  }
89  // initialize rptr to be beginning of each segment
90  std::size_t count = 0;
91  for (std::size_t i = base_row_offset_; i + 1 < rptr_.size(); ++i) {
92  for (std::size_t tid = 0; tid < thread_rptr_.size(); ++tid) {
93  std::vector<SizeType> &trptr = thread_rptr_[tid];
94  if (i < trptr.size() +
95  base_row_offset_) { // i^th row is assigned for this thread
96  std::size_t thread_count =
97  trptr[i - base_row_offset_]; // how many entries in this row
98  trptr[i - base_row_offset_] = count + rptr_.back();
99  count += thread_count;
100  }
101  }
102  rptr_[i + 1] += count; // pointer accumulated from all thread
103  }
104  data_.resize(rptr_.back());
105  }
106 
115  void Push(std::size_t key, ValueType&& value, int threadid) {
116  size_t offset_key = key - base_row_offset_;
117  SizeType &rp = thread_rptr_[threadid][offset_key];
118  data_[rp++] = std::move(value);
119  }
120 
121  private:
123  std::vector<SizeType> &rptr_;
125  std::vector<ValueType> &data_;
127  std::vector<std::vector<SizeType> > thread_rptr_;
129  size_t base_row_offset_;
130 };
131 } // namespace common
132 } // namespace xgboost
133 #endif // XGBOOST_COMMON_GROUP_DATA_H_
xgboost::common::ParallelGroupBuilder::InitStorage
void InitStorage()
step 3: initialize the necessary storage
Definition: group_data.h:80
xgboost::common::ParallelGroupBuilder::InitBudget
void InitBudget(std::size_t max_key, int nthread)
step 1: initialize the helper, with hint of number keys and thread used in the construction
Definition: group_data.h:57
base.h
defines configuration macros of xgboost.
xgboost::common::ParallelGroupBuilder
multi-thread version of group builder
Definition: group_data.h:32
xgboost::common::ParallelGroupBuilder::AddBudget
void AddBudget(std::size_t key, int threadid, SizeType nelem=1)
step 2: add budget to each key
Definition: group_data.h:70
xgboost::common::ParallelGroupBuilder::ParallelGroupBuilder
ParallelGroupBuilder(std::vector< SizeType > *p_rptr, std::vector< ValueType > *p_data, size_t base_row_offset=0)
parallel group builder of data.
Definition: group_data.h:44
xgboost::common::ParallelGroupBuilder::Push
void Push(std::size_t key, ValueType &&value, int threadid)
step 4: add data to the allocated space, the calls to this function should be exactly match previous ...
Definition: group_data.h:115
xgboost
namespace of xgboost
Definition: base.h:110