xgboost
partition_builder.h
Go to the documentation of this file.
1 
8 #ifndef XGBOOST_COMMON_PARTITION_BUILDER_H_
9 #define XGBOOST_COMMON_PARTITION_BUILDER_H_
10 
11 #include <xgboost/data.h>
12 #include <algorithm>
13 #include <vector>
14 #include <utility>
15 #include <memory>
16 #include "xgboost/tree_model.h"
17 #include "../common/column_matrix.h"
18 
19 namespace xgboost {
20 namespace common {
21 
22 // The builder is required for samples partition to left and rights children for set of nodes
23 // Responsible for:
24 // 1) Effective memory allocation for intermediate results for multi-thread work
25 // 2) Merging partial results produced by threads into original row set (row_set_collection_)
26 // BlockSize is template to enable memory alignment easily with C++11 'alignas()' feature
27 template<size_t BlockSize>
29  public:
30  template<typename Func>
31  void Init(const size_t n_tasks, size_t n_nodes, Func funcNTask) {
32  left_right_nodes_sizes_.resize(n_nodes);
33  blocks_offsets_.resize(n_nodes+1);
34 
35  blocks_offsets_[0] = 0;
36  for (size_t i = 1; i < n_nodes+1; ++i) {
37  blocks_offsets_[i] = blocks_offsets_[i-1] + funcNTask(i-1);
38  }
39 
40  if (n_tasks > max_n_tasks_) {
41  mem_blocks_.resize(n_tasks);
42  max_n_tasks_ = n_tasks;
43  }
44  }
45 
46  // split row indexes (rid_span) to 2 parts (left_part, right_part) depending
47  // on comparison of indexes values (idx_span) and split point (split_cond)
48  // Handle dense columns
49  // Analog of std::stable_partition, but in no-inplace manner
50  template <bool default_left, bool any_missing, typename ColumnType>
51  inline std::pair<size_t, size_t> PartitionKernel(const ColumnType& column,
52  common::Span<const size_t> rid_span, const int32_t split_cond,
53  common::Span<size_t> left_part, common::Span<size_t> right_part) {
54  size_t* p_left_part = left_part.data();
55  size_t* p_right_part = right_part.data();
56  size_t nleft_elems = 0;
57  size_t nright_elems = 0;
58  auto state = column.GetInitialState(rid_span.front());
59 
60  for (auto rid : rid_span) {
61  const int32_t bin_id = column.GetBinIdx(rid, &state);
62  if (any_missing && bin_id == ColumnType::kMissingId) {
63  if (default_left) {
64  p_left_part[nleft_elems++] = rid;
65  } else {
66  p_right_part[nright_elems++] = rid;
67  }
68  } else {
69  if (bin_id <= split_cond) {
70  p_left_part[nleft_elems++] = rid;
71  } else {
72  p_right_part[nright_elems++] = rid;
73  }
74  }
75  }
76 
77  return {nleft_elems, nright_elems};
78  }
79 
80 
81  template <typename BinIdxType, bool any_missing>
82  void Partition(const size_t node_in_set, const size_t nid, const common::Range1d range,
83  const int32_t split_cond,
84  const ColumnMatrix& column_matrix, const RegTree& tree, const size_t* rid) {
85  common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
86  common::Span<size_t> left = GetLeftBuffer(node_in_set,
87  range.begin(), range.end());
88  common::Span<size_t> right = GetRightBuffer(node_in_set,
89  range.begin(), range.end());
90  const bst_uint fid = tree[nid].SplitIndex();
91  const bool default_left = tree[nid].DefaultLeft();
92  const auto column_ptr = column_matrix.GetColumn<BinIdxType, any_missing>(fid);
93 
94  std::pair<size_t, size_t> child_nodes_sizes;
95 
96  if (column_ptr->GetType() == xgboost::common::kDenseColumn) {
98  static_cast<const common::DenseColumn<BinIdxType, any_missing>& >(*(column_ptr.get()));
99  if (default_left) {
100  child_nodes_sizes = PartitionKernel<true, any_missing>(column, rid_span,
101  split_cond, left, right);
102  } else {
103  child_nodes_sizes = PartitionKernel<false, any_missing>(column, rid_span,
104  split_cond, left, right);
105  }
106  } else {
107  CHECK_EQ(any_missing, true);
109  = static_cast<const common::SparseColumn<BinIdxType>& >(*(column_ptr.get()));
110  if (default_left) {
111  child_nodes_sizes = PartitionKernel<true, any_missing>(column, rid_span,
112  split_cond, left, right);
113  } else {
114  child_nodes_sizes = PartitionKernel<false, any_missing>(column, rid_span,
115  split_cond, left, right);
116  }
117  }
118 
119  const size_t n_left = child_nodes_sizes.first;
120  const size_t n_right = child_nodes_sizes.second;
121 
122  SetNLeftElems(node_in_set, range.begin(), range.end(), n_left);
123  SetNRightElems(node_in_set, range.begin(), range.end(), n_right);
124  }
125 
126 
127  // allocate thread local memory, should be called for each specific task
128  void AllocateForTask(size_t id) {
129  if (mem_blocks_[id].get() == nullptr) {
130  BlockInfo* local_block_ptr = new BlockInfo;
131  CHECK_NE(local_block_ptr, (BlockInfo*)nullptr);
132  mem_blocks_[id].reset(local_block_ptr);
133  }
134  }
135 
136  common::Span<size_t> GetLeftBuffer(int nid, size_t begin, size_t end) {
137  const size_t task_idx = GetTaskIdx(nid, begin);
138  return { mem_blocks_.at(task_idx)->Left(), end - begin };
139  }
140 
141  common::Span<size_t> GetRightBuffer(int nid, size_t begin, size_t end) {
142  const size_t task_idx = GetTaskIdx(nid, begin);
143  return { mem_blocks_.at(task_idx)->Right(), end - begin };
144  }
145 
146  void SetNLeftElems(int nid, size_t begin, size_t end, size_t n_left) {
147  size_t task_idx = GetTaskIdx(nid, begin);
148  mem_blocks_.at(task_idx)->n_left = n_left;
149  }
150 
151  void SetNRightElems(int nid, size_t begin, size_t end, size_t n_right) {
152  size_t task_idx = GetTaskIdx(nid, begin);
153  mem_blocks_.at(task_idx)->n_right = n_right;
154  }
155 
156 
157  size_t GetNLeftElems(int nid) const {
158  return left_right_nodes_sizes_[nid].first;
159  }
160 
161  size_t GetNRightElems(int nid) const {
162  return left_right_nodes_sizes_[nid].second;
163  }
164 
165  // Each thread has partial results for some set of tree-nodes
166  // The function decides order of merging partial results into final row set
168  for (size_t i = 0; i < blocks_offsets_.size()-1; ++i) {
169  size_t n_left = 0;
170  for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) {
171  mem_blocks_[j]->n_offset_left = n_left;
172  n_left += mem_blocks_[j]->n_left;
173  }
174  size_t n_right = 0;
175  for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) {
176  mem_blocks_[j]->n_offset_right = n_left + n_right;
177  n_right += mem_blocks_[j]->n_right;
178  }
179  left_right_nodes_sizes_[i] = {n_left, n_right};
180  }
181  }
182 
183  void MergeToArray(int nid, size_t begin, size_t* rows_indexes) {
184  size_t task_idx = GetTaskIdx(nid, begin);
185 
186  size_t* left_result = rows_indexes + mem_blocks_[task_idx]->n_offset_left;
187  size_t* right_result = rows_indexes + mem_blocks_[task_idx]->n_offset_right;
188 
189  const size_t* left = mem_blocks_[task_idx]->Left();
190  const size_t* right = mem_blocks_[task_idx]->Right();
191 
192  std::copy_n(left, mem_blocks_[task_idx]->n_left, left_result);
193  std::copy_n(right, mem_blocks_[task_idx]->n_right, right_result);
194  }
195 
196  size_t GetTaskIdx(int nid, size_t begin) {
197  return blocks_offsets_[nid] + begin / BlockSize;
198  }
199 
200  protected:
201  struct BlockInfo{
202  size_t n_left;
203  size_t n_right;
204 
207 
208  size_t* Left() {
209  return &left_data_[0];
210  }
211 
212  size_t* Right() {
213  return &right_data_[0];
214  }
215  private:
216  size_t left_data_[BlockSize];
217  size_t right_data_[BlockSize];
218  };
219  std::vector<std::pair<size_t, size_t>> left_right_nodes_sizes_;
220  std::vector<size_t> blocks_offsets_;
221  std::vector<std::shared_ptr<BlockInfo>> mem_blocks_;
222  size_t max_n_tasks_ = 0;
223 };
224 
225 } // namespace common
226 } // namespace xgboost
227 
228 #endif // XGBOOST_COMMON_PARTITION_BUILDER_H_
xgboost::common::PartitionBuilder::MergeToArray
void MergeToArray(int nid, size_t begin, size_t *rows_indexes)
Definition: partition_builder.h:183
xgboost::common::PartitionBuilder::BlockInfo::Right
size_t * Right()
Definition: partition_builder.h:212
xgboost::common::PartitionBuilder
Definition: partition_builder.h:28
xgboost::common::PartitionBuilder::GetRightBuffer
common::Span< size_t > GetRightBuffer(int nid, size_t begin, size_t end)
Definition: partition_builder.h:141
xgboost::common::PartitionBuilder::max_n_tasks_
size_t max_n_tasks_
Definition: partition_builder.h:222
xgboost::common::Range1d::begin
size_t begin() const
Definition: threading_utils.h:43
xgboost::common::PartitionBuilder::mem_blocks_
std::vector< std::shared_ptr< BlockInfo > > mem_blocks_
Definition: partition_builder.h:221
xgboost::common::PartitionBuilder::Partition
void Partition(const size_t node_in_set, const size_t nid, const common::Range1d range, const int32_t split_cond, const ColumnMatrix &column_matrix, const RegTree &tree, const size_t *rid)
Definition: partition_builder.h:82
xgboost::common::ColumnMatrix
a collection of columns, with support for construction from GHistIndexMatrix.
Definition: column_matrix.h:141
xgboost::common::PartitionBuilder::BlockInfo::n_offset_right
size_t n_offset_right
Definition: partition_builder.h:206
xgboost::common::PartitionBuilder::BlockInfo::n_offset_left
size_t n_offset_left
Definition: partition_builder.h:205
xgboost::common::PartitionBuilder::BlockInfo::n_left
size_t n_left
Definition: partition_builder.h:202
xgboost::common::PartitionBuilder::left_right_nodes_sizes_
std::vector< std::pair< size_t, size_t > > left_right_nodes_sizes_
Definition: partition_builder.h:219
xgboost::common::kDenseColumn
@ kDenseColumn
Definition: column_matrix.h:23
xgboost::common::DenseColumn
Definition: column_matrix.h:111
xgboost::common::SparseColumn
Definition: column_matrix.h:68
xgboost::RegTree
define regression tree to be the most common tree model. This is the data structure used in xgboost's...
Definition: tree_model.h:131
xgboost::common::Span::front
XGBOOST_DEVICE reference front() const
Definition: span.h:525
xgboost::common::PartitionBuilder::GetLeftBuffer
common::Span< size_t > GetLeftBuffer(int nid, size_t begin, size_t end)
Definition: partition_builder.h:136
xgboost::common::PartitionBuilder::BlockInfo
Definition: partition_builder.h:201
xgboost::common::PartitionBuilder::BlockInfo::n_right
size_t n_right
Definition: partition_builder.h:203
xgboost::common::PartitionBuilder::BlockInfo::Left
size_t * Left()
Definition: partition_builder.h:208
xgboost::common::Span< const size_t >
xgboost::common::PartitionBuilder::GetTaskIdx
size_t GetTaskIdx(int nid, size_t begin)
Definition: partition_builder.h:196
xgboost::common::ColumnType
ColumnType
column type
Definition: column_matrix.h:22
data.h
The input data structure of xgboost.
xgboost::common::PartitionBuilder::SetNRightElems
void SetNRightElems(int nid, size_t begin, size_t end, size_t n_right)
Definition: partition_builder.h:151
xgboost::common::PartitionBuilder::AllocateForTask
void AllocateForTask(size_t id)
Definition: partition_builder.h:128
xgboost::common::PartitionBuilder::GetNLeftElems
size_t GetNLeftElems(int nid) const
Definition: partition_builder.h:157
xgboost::common::Range1d
Definition: threading_utils.h:37
xgboost::common::PartitionBuilder::blocks_offsets_
std::vector< size_t > blocks_offsets_
Definition: partition_builder.h:220
xgboost::common::PartitionBuilder::CalculateRowOffsets
void CalculateRowOffsets()
Definition: partition_builder.h:167
xgboost::common::ColumnMatrix::GetColumn
std::unique_ptr< const Column< BinIdxType > > GetColumn(unsigned fid) const
Definition: column_matrix.h:246
tree_model.h
model structure for tree
xgboost::common::Span::data
constexpr XGBOOST_DEVICE pointer data() const __span_noexcept
Definition: span.h:542
xgboost::bst_uint
uint32_t bst_uint
unsigned integer type used for feature index.
Definition: base.h:113
xgboost::common::Range1d::end
size_t end() const
Definition: threading_utils.h:47
xgboost::get
auto get(U &json) -> decltype(detail::GetImpl(*Cast< T >(&json.GetValue())))&
Get Json value.
Definition: json.h:556
xgboost::common::PartitionBuilder::SetNLeftElems
void SetNLeftElems(int nid, size_t begin, size_t end, size_t n_left)
Definition: partition_builder.h:146
xgboost::common::PartitionBuilder::PartitionKernel
std::pair< size_t, size_t > PartitionKernel(const ColumnType &column, common::Span< const size_t > rid_span, const int32_t split_cond, common::Span< size_t > left_part, common::Span< size_t > right_part)
Definition: partition_builder.h:51
xgboost::common::PartitionBuilder::GetNRightElems
size_t GetNRightElems(int nid) const
Definition: partition_builder.h:161
xgboost
namespace of xgboost
Definition: base.h:110
xgboost::common::PartitionBuilder::Init
void Init(const size_t n_tasks, size_t n_nodes, Func funcNTask)
Definition: partition_builder.h:31