xgboost
threading_utils.h
Go to the documentation of this file.
1 
6 #ifndef XGBOOST_COMMON_THREADING_UTILS_H_
7 #define XGBOOST_COMMON_THREADING_UTILS_H_
8 
9 #include <vector>
10 #include <algorithm>
11 
12 #include "xgboost/logging.h"
13 
14 namespace xgboost {
15 namespace common {
16 
17 // Represent simple range of indexes [begin, end)
18 // Inspired by tbb::blocked_range
19 class Range1d {
20  public:
21  Range1d(size_t begin, size_t end): begin_(begin), end_(end) {
22  CHECK_LT(begin, end);
23  }
24 
25  size_t begin() const { // NOLINT
26  return begin_;
27  }
28 
29  size_t end() const { // NOLINT
30  return end_;
31  }
32 
33  private:
34  size_t begin_;
35  size_t end_;
36 };
37 
38 
39 // Split 2d space to balanced blocks
40 // Implementation of the class is inspired by tbb::blocked_range2d
41 // However, TBB provides only (n x m) 2d range (matrix) separated by blocks. Example:
42 // [ 1,2,3 ]
43 // [ 4,5,6 ]
44 // [ 7,8,9 ]
45 // But the class is able to work with different sizes in each 'row'. Example:
46 // [ 1,2 ]
47 // [ 3,4,5,6 ]
48 // [ 7,8,9]
49 // If grain_size is 2: It produces following blocks:
50 // [1,2], [3,4], [5,6], [7,8], [9]
51 // The class helps to process data in several tree nodes (non-balanced usually) in parallel
52 // Using nested parallelism (by nodes and by data in each node)
53 // it helps to improve CPU resources utilization
55  public:
56  // Example of space:
57  // [ 1,2 ]
58  // [ 3,4,5,6 ]
59  // [ 7,8,9]
60  // BlockedSpace2d will create following blocks (tasks) if grain_size=2:
61  // 1-block: first_dimension = 0, range of indexes in a 'row' = [0,2) (includes [1,2] values)
62  // 2-block: first_dimension = 1, range of indexes in a 'row' = [0,2) (includes [3,4] values)
63  // 3-block: first_dimension = 1, range of indexes in a 'row' = [2,4) (includes [5,6] values)
64  // 4-block: first_dimension = 2, range of indexes in a 'row' = [0,2) (includes [7,8] values)
65  // 5-block: first_dimension = 2, range of indexes in a 'row' = [2,3) (includes [9] values)
66  // Arguments:
67  // dim1 - size of the first dimension in the space
68  // getter_size_dim2 - functor to get the second dimensions for each 'row' by row-index
69  // grain_size - max size of produced blocks
70  template<typename Func>
71  BlockedSpace2d(size_t dim1, Func getter_size_dim2, size_t grain_size) {
72  for (size_t i = 0; i < dim1; ++i) {
73  const size_t size = getter_size_dim2(i);
74  const size_t n_blocks = size/grain_size + !!(size % grain_size);
75  for (size_t iblock = 0; iblock < n_blocks; ++iblock) {
76  const size_t begin = iblock * grain_size;
77  const size_t end = std::min(begin + grain_size, size);
78  AddBlock(i, begin, end);
79  }
80  }
81  }
82 
83  // Amount of blocks(tasks) in a space
84  size_t Size() const {
85  return ranges_.size();
86  }
87 
88  // get index of the first dimension of i-th block(task)
89  size_t GetFirstDimension(size_t i) const {
90  CHECK_LT(i, first_dimension_.size());
91  return first_dimension_[i];
92  }
93 
94  // get a range of indexes for the second dimension of i-th block(task)
95  Range1d GetRange(size_t i) const {
96  CHECK_LT(i, ranges_.size());
97  return ranges_[i];
98  }
99 
100  private:
101  void AddBlock(size_t first_dimension, size_t begin, size_t end) {
102  first_dimension_.push_back(first_dimension);
103  ranges_.emplace_back(begin, end);
104  }
105 
106  std::vector<Range1d> ranges_;
107  std::vector<size_t> first_dimension_;
108 };
109 
110 
111 // Wrapper to implement nested parallelism with simple omp parallel for
112 template<typename Func>
113 void ParallelFor2d(const BlockedSpace2d& space, int nthreads, Func func) {
114  const size_t num_blocks_in_space = space.Size();
115  nthreads = std::min(nthreads, omp_get_max_threads());
116  nthreads = std::max(nthreads, 1);
117 
118 #pragma omp parallel num_threads(nthreads)
119  {
120  size_t tid = omp_get_thread_num();
121  size_t chunck_size = num_blocks_in_space / nthreads + !!(num_blocks_in_space % nthreads);
122 
123  size_t begin = chunck_size * tid;
124  size_t end = std::min(begin + chunck_size, num_blocks_in_space);
125  for (auto i = begin; i < end; i++) {
126  func(space.GetFirstDimension(i), space.GetRange(i));
127  }
128  }
129 }
130 
131 } // namespace common
132 } // namespace xgboost
133 
134 #endif // XGBOOST_COMMON_THREADING_UTILS_H_
BlockedSpace2d(size_t dim1, Func getter_size_dim2, size_t grain_size)
Definition: threading_utils.h:71
size_t begin() const
Definition: threading_utils.h:25
size_t GetFirstDimension(size_t i) const
Definition: threading_utils.h:89
size_t Size() const
Definition: threading_utils.h:84
Definition: threading_utils.h:19
namespace of xgboost
Definition: base.h:102
Definition: threading_utils.h:54
void ParallelFor2d(const BlockedSpace2d &space, int nthreads, Func func)
Definition: threading_utils.h:113
Range1d GetRange(size_t i) const
Definition: threading_utils.h:95
size_t end() const
Definition: threading_utils.h:29
Range1d(size_t begin, size_t end)
Definition: threading_utils.h:21