Go to the documentation of this file.
7 #ifndef XGBOOST_COMMON_PARTITION_BUILDER_H_
8 #define XGBOOST_COMMON_PARTITION_BUILDER_H_
29 template<
size_t BlockSize>
32 template<
typename Func>
33 void Init(
const size_t n_tasks,
size_t n_nodes, Func funcNTask) {
38 for (
size_t i = 1; i < n_nodes+1; ++i) {
52 template <
bool default_left,
bool any_missing,
typename ColumnType,
typename Predicate>
57 size_t base_rowid, Predicate&& pred) {
58 size_t* p_left_part = left_part.
data();
59 size_t* p_right_part = right_part.
data();
60 size_t nleft_elems = 0;
61 size_t nright_elems = 0;
62 auto state = column.GetInitialState(row_indices.
front() - base_rowid);
64 auto p_row_indices = row_indices.
data();
65 auto n_samples = row_indices.
size();
67 for (
size_t i = 0; i < n_samples; ++i) {
68 auto rid = p_row_indices[i];
69 const int32_t bin_id = column.GetBinIdx(rid - base_rowid, &state);
70 if (any_missing && bin_id == ColumnType::kMissingId) {
72 p_left_part[nleft_elems++] = rid;
74 p_right_part[nright_elems++] = rid;
77 if (pred(rid, bin_id)) {
78 p_left_part[nleft_elems++] = rid;
80 p_right_part[nright_elems++] = rid;
85 return {nleft_elems, nright_elems};
88 template <
typename Pred>
93 size_t* p_left_part = left_part.
data();
94 size_t* p_right_part = right_part.
data();
95 size_t nleft_elems = 0;
96 size_t nright_elems = 0;
97 for (
auto row_id : ridx) {
99 p_left_part[nleft_elems++] = row_id;
101 p_right_part[nright_elems++] = row_id;
104 return {nleft_elems, nright_elems};
107 template <
typename BinIdxType,
bool any_missing,
bool any_cat>
109 const int32_t split_cond, GHistIndexMatrix
const& gmat,
114 const bst_uint fid = tree[nid].SplitIndex();
115 const bool default_left = tree[nid].DefaultLeft();
116 const auto column_ptr = column_matrix.
GetColumn<BinIdxType, any_missing>(fid);
119 auto node_cats = tree.
NodeCats(nid);
121 auto const& index = gmat.index;
122 auto const& cut_values = gmat.cut.Values();
123 auto const& cut_ptrs = gmat.cut.Ptrs();
125 auto pred = [&](
auto ridx,
auto bin_id) {
126 if (any_cat && is_cat) {
127 auto begin = gmat.RowIdx(ridx);
128 auto end = gmat.RowIdx(ridx + 1);
129 auto f_begin = cut_ptrs[fid];
130 auto f_end = cut_ptrs[fid + 1];
136 go_left = default_left;
138 go_left =
Decision(node_cats, cut_values[gidx], default_left);
142 return bin_id <= split_cond;
146 std::pair<size_t, size_t> child_nodes_sizes;
151 child_nodes_sizes = PartitionKernel<true, any_missing>(column, rid_span, left, right,
152 gmat.base_rowid, pred);
154 child_nodes_sizes = PartitionKernel<false, any_missing>(column, rid_span, left, right,
155 gmat.base_rowid, pred);
158 CHECK_EQ(any_missing,
true);
162 child_nodes_sizes = PartitionKernel<true, any_missing>(column, rid_span, left, right,
163 gmat.base_rowid, pred);
165 child_nodes_sizes = PartitionKernel<false, any_missing>(column, rid_span, left, right,
166 gmat.base_rowid, pred);
170 const size_t n_left = child_nodes_sizes.first;
171 const size_t n_right = child_nodes_sizes.second;
191 template <
typename Pred>
195 auto& row_set_collection = *p_row_set_collection;
196 const size_t* p_ridx = row_set_collection[nid].
begin;
202 const size_t n_left = child_nodes_sizes.first;
203 const size_t n_right = child_nodes_sizes.second;
213 CHECK_NE(local_block_ptr, (
BlockInfo*)
nullptr);
219 const size_t task_idx =
GetTaskIdx(nid, begin);
220 return {
mem_blocks_.at(task_idx)->Left(), end - begin };
224 const size_t task_idx =
GetTaskIdx(nid, begin);
225 return {
mem_blocks_.at(task_idx)->Right(), end - begin };
268 size_t* left_result = rows_indexes +
mem_blocks_[task_idx]->n_offset_left;
269 size_t* right_result = rows_indexes +
mem_blocks_[task_idx]->n_offset_right;
271 const size_t* left =
mem_blocks_[task_idx]->Left();
272 const size_t* right =
mem_blocks_[task_idx]->Right();
274 std::copy_n(left,
mem_blocks_[task_idx]->n_left, left_result);
275 std::copy_n(right,
mem_blocks_[task_idx]->n_right, right_result);
291 return &left_data_[0];
295 return &right_data_[0];
298 size_t left_data_[BlockSize];
299 size_t right_data_[BlockSize];
310 #endif // XGBOOST_COMMON_PARTITION_BUILDER_H_
std::pair< size_t, size_t > PartitionKernel(const ColumnType &column, common::Span< const size_t > row_indices, common::Span< size_t > left_part, common::Span< size_t > right_part, size_t base_rowid, Predicate &&pred)
Definition: partition_builder.h:53
void MergeToArray(int nid, size_t begin, size_t *rows_indexes)
Definition: partition_builder.h:265
size_t * Right()
Definition: partition_builder.h:294
Definition: partition_builder.h:30
common::Span< size_t > GetRightBuffer(int nid, size_t begin, size_t end)
Definition: partition_builder.h:223
const std::vector< FeatureType > & GetSplitTypes() const
Get split types for all nodes.
Definition: tree_model.h:605
void PartitionRange(const size_t node_in_set, const size_t nid, common::Range1d range, bst_feature_t fidx, common::RowSetCollection *p_row_set_collection, Pred pred)
Partition tree nodes with specific range of row indices.
Definition: partition_builder.h:192
size_t max_n_tasks_
Definition: partition_builder.h:304
size_t begin() const
Definition: threading_utils.h:41
Utility for fast column-wise access.
std::vector< std::shared_ptr< BlockInfo > > mem_blocks_
Definition: partition_builder.h:303
common::Span< uint32_t const > NodeCats(bst_node_t nidx) const
Get the bit storage for categories.
Definition: tree_model.h:610
a collection of columns, with support for construction from GHistIndexMatrix.
Definition: column_matrix.h:134
@ kDenseColumn
Definition: column_matrix.h:26
size_t n_offset_right
Definition: partition_builder.h:288
collection of rowset
Definition: row_set.h:20
uint32_t bst_feature_t
Type for data column (feature) index.
Definition: base.h:123
size_t n_offset_left
Definition: partition_builder.h:287
std::vector< Elem >::const_iterator begin() const
Definition: row_set.h:47
size_t n_left
Definition: partition_builder.h:284
std::pair< size_t, size_t > PartitionRangeKernel(common::Span< const size_t > ridx, common::Span< size_t > left_part, common::Span< size_t > right_part, Pred pred)
Definition: partition_builder.h:89
int32_t XGBOOST_HOST_DEV_INLINE BinarySearchBin(size_t begin, size_t end, GradientIndex const &data, uint32_t const fidx_begin, uint32_t const fidx_end)
Definition: hist_util.h:299
void Partition(const size_t node_in_set, const size_t nid, const common::Range1d range, const int32_t split_cond, GHistIndexMatrix const &gmat, const ColumnMatrix &column_matrix, const RegTree &tree, const size_t *rid)
Definition: partition_builder.h:108
std::vector< std::pair< size_t, size_t > > left_right_nodes_sizes_
Definition: partition_builder.h:301
Definition: column_matrix.h:107
Definition: column_matrix.h:67
define regression tree to be the most common tree model. This is the data structure used in xgboost's...
Definition: tree_model.h:131
XGBOOST_DEVICE reference front() const
Definition: span.h:531
common::Span< size_t > GetLeftBuffer(int nid, size_t begin, size_t end)
Definition: partition_builder.h:218
Definition: partition_builder.h:283
size_t n_right
Definition: partition_builder.h:285
size_t * Left()
Definition: partition_builder.h:290
size_t GetTaskIdx(int nid, size_t begin)
Definition: partition_builder.h:278
ColumnType
column type
Definition: column_matrix.h:26
The input data structure of xgboost.
void SetNRightElems(int nid, size_t begin, size_t end, size_t n_right)
Definition: partition_builder.h:233
void AllocateForTask(size_t id)
Definition: partition_builder.h:210
size_t GetNLeftElems(int nid) const
Definition: partition_builder.h:239
Definition: threading_utils.h:35
std::vector< size_t > blocks_offsets_
Definition: partition_builder.h:302
void CalculateRowOffsets()
Definition: partition_builder.h:249
constexpr XGBOOST_DEVICE index_type size() const __span_noexcept
Definition: span.h:553
std::unique_ptr< const Column< BinIdxType > > GetColumn(unsigned fid) const
Definition: column_matrix.h:240
constexpr XGBOOST_DEVICE pointer data() const __span_noexcept
Definition: span.h:548
XGBOOST_DEVICE bool Decision(common::Span< uint32_t const > cats, float cat, bool dft_left)
Definition: categorical.h:56
uint32_t bst_uint
unsigned integer type used for feature index.
Definition: base.h:113
size_t end() const
Definition: threading_utils.h:45
auto get(U &json) -> decltype(detail::GetImpl(*Cast< T >(&json.GetValue())))&
Get Json value.
Definition: json.h:583
void SetNLeftElems(int nid, size_t begin, size_t end, size_t n_left)
Definition: partition_builder.h:228
size_t GetNRightElems(int nid) const
Definition: partition_builder.h:243
namespace of xgboost
Definition: base.h:110
void Init(const size_t n_tasks, size_t n_nodes, Func funcNTask)
Definition: partition_builder.h:33