xgboost
group_data.h
Go to the documentation of this file.
1 
14 #ifndef XGBOOST_COMMON_GROUP_DATA_H_
15 #define XGBOOST_COMMON_GROUP_DATA_H_
16 
17 #include <vector>
18 
19 namespace xgboost {
20 namespace common {
26 template<typename ValueType, typename SizeType = std::size_t>
28  public:
29  // parallel group builder of data
30  ParallelGroupBuilder(std::vector<SizeType> *p_rptr,
31  std::vector<ValueType> *p_data)
32  : rptr_(*p_rptr), data_(*p_data), thread_rptr_(tmp_thread_rptr_) {
33  }
34  ParallelGroupBuilder(std::vector<SizeType> *p_rptr,
35  std::vector<ValueType> *p_data,
36  std::vector< std::vector<SizeType> > *p_thread_rptr)
37  : rptr_(*p_rptr), data_(*p_data), thread_rptr_(*p_thread_rptr) {
38  }
39 
40  public:
47  inline void InitBudget(std::size_t nkeys, int nthread) {
48  thread_rptr_.resize(nthread);
49  for (std::size_t i = 0; i < thread_rptr_.size(); ++i) {
50  thread_rptr_[i].resize(nkeys);
51  std::fill(thread_rptr_[i].begin(), thread_rptr_[i].end(), 0);
52  }
53  }
60  inline void AddBudget(std::size_t key, int threadid, SizeType nelem = 1) {
61  std::vector<SizeType> &trptr = thread_rptr_[threadid];
62  if (trptr.size() < key + 1) {
63  trptr.resize(key + 1, 0);
64  }
65  trptr[key] += nelem;
66  }
68  inline void InitStorage() {
69  // set rptr to correct size
70  for (std::size_t tid = 0; tid < thread_rptr_.size(); ++tid) {
71  if (rptr_.size() <= thread_rptr_[tid].size()) {
72  rptr_.resize(thread_rptr_[tid].size() + 1); // key + 1
73  }
74  }
75  // initialize rptr to be beginning of each segment
76  std::size_t start = 0;
77  for (std::size_t i = 0; i + 1 < rptr_.size(); ++i) {
78  for (std::size_t tid = 0; tid < thread_rptr_.size(); ++tid) {
79  std::vector<SizeType> &trptr = thread_rptr_[tid];
80  if (i < trptr.size()) { // i^th row is assigned for this thread
81  std::size_t ncnt = trptr[i]; // how many entries in this row
82  trptr[i] = start;
83  start += ncnt;
84  }
85  }
86  rptr_[i + 1] = start; // pointer accumulated from all thread
87  }
88  data_.resize(start);
89  }
98  void Push(std::size_t key, ValueType value, int threadid) {
99  SizeType &rp = thread_rptr_[threadid][key];
100  data_[rp++] = value;
101  }
102 
103  private:
105  std::vector<SizeType> &rptr_;
107  std::vector<ValueType> &data_;
109  std::vector<std::vector<SizeType> > &thread_rptr_;
111  std::vector<std::vector<SizeType> > tmp_thread_rptr_;
112 };
113 } // namespace common
114 } // namespace xgboost
115 #endif // XGBOOST_COMMON_GROUP_DATA_H_
multi-thread version of group builder
Definition: group_data.h:27
void Push(std::size_t key, ValueType value, int threadid)
step 4: add data to the allocated space, the calls to this function should be exactly match previous ...
Definition: group_data.h:98
ParallelGroupBuilder(std::vector< SizeType > *p_rptr, std::vector< ValueType > *p_data, std::vector< std::vector< SizeType > > *p_thread_rptr)
Definition: group_data.h:34
void InitBudget(std::size_t nkeys, int nthread)
step 1: initialize the helper, with hint of number keys and thread used in the construction ...
Definition: group_data.h:47
namespace of xgboost
Definition: base.h:79
void AddBudget(std::size_t key, int threadid, SizeType nelem=1)
step 2: add budget to each key
Definition: group_data.h:60
ParallelGroupBuilder(std::vector< SizeType > *p_rptr, std::vector< ValueType > *p_data)
Definition: group_data.h:30
void InitStorage()
step 3: initialize the necessary storage
Definition: group_data.h:68