Go to the documentation of this file.
4 #ifndef XGBOOST_COMMON_THREADING_UTILS_H_
5 #define XGBOOST_COMMON_THREADING_UTILS_H_
7 #include <dmlc/common.h>
12 #include <type_traits>
15 #include "xgboost/logging.h"
21 #endif // !defined(_OPENMP)
24 #if defined(_OPENMP) && defined(_MSC_VER)
28 #endif // defined(_MSC_VER)
86 template<
typename Func>
88 for (
size_t i = 0; i < dim1; ++i) {
89 const size_t size = getter_size_dim2(i);
90 const size_t n_blocks = size/grain_size + !!(size % grain_size);
91 for (
size_t iblock = 0; iblock < n_blocks; ++iblock) {
92 const size_t begin = iblock * grain_size;
93 const size_t end = std::min(begin + grain_size, size);
94 AddBlock(i, begin, end);
101 return ranges_.size();
106 CHECK_LT(i, first_dimension_.size());
107 return first_dimension_[i];
112 CHECK_LT(i, ranges_.size());
117 void AddBlock(
size_t first_dimension,
size_t begin,
size_t end) {
118 first_dimension_.push_back(first_dimension);
119 ranges_.emplace_back(begin, end);
122 std::vector<Range1d> ranges_;
123 std::vector<size_t> first_dimension_;
128 template <
typename Func>
130 const size_t num_blocks_in_space = space.
Size();
131 CHECK_GE(nthreads, 1);
133 dmlc::OMPException exc;
134 #pragma omp parallel num_threads(nthreads)
137 size_t tid = omp_get_thread_num();
139 num_blocks_in_space / nthreads + !!(num_blocks_in_space % nthreads);
141 size_t begin = chunck_size * tid;
142 size_t end = std::min(begin + chunck_size, num_blocks_in_space);
143 for (
auto i = begin; i < end; i++) {
169 template <
typename Index,
typename Func>
171 #if defined(_MSC_VER)
173 using OmpInd = std::conditional_t<std::is_signed<Index>::value,
Index,
omp_ulong>;
175 using OmpInd =
Index;
177 OmpInd length =
static_cast<OmpInd
>(size);
178 CHECK_GE(n_threads, 1);
180 dmlc::OMPException exc;
181 switch (sched.
sched) {
183 #pragma omp parallel for num_threads(n_threads)
184 for (OmpInd i = 0; i < length; ++i) {
190 if (sched.
chunk == 0) {
191 #pragma omp parallel for num_threads(n_threads) schedule(dynamic)
192 for (OmpInd i = 0; i < length; ++i) {
196 #pragma omp parallel for num_threads(n_threads) schedule(dynamic, sched.chunk)
197 for (OmpInd i = 0; i < length; ++i) {
204 if (sched.
chunk == 0) {
205 #pragma omp parallel for num_threads(n_threads) schedule(static)
206 for (OmpInd i = 0; i < length; ++i) {
210 #pragma omp parallel for num_threads(n_threads) schedule(static, sched.chunk)
211 for (OmpInd i = 0; i < length; ++i) {
218 #pragma omp parallel for num_threads(n_threads) schedule(guided)
219 for (OmpInd i = 0; i < length; ++i) {
228 template <
typename Index,
typename Func>
235 CHECK_GE(limit, 1) <<
"Invalid thread limit for OpenMP.";
242 if (n_threads <= 0) {
243 n_threads = std::min(omp_get_num_procs(), omp_get_max_threads());
246 n_threads = std::max(n_threads, 1);
256 template <
typename T,
size_t MaxStackSize>
260 if (MaxStackSize >= required_size_) {
263 ptr_ =
reinterpret_cast<T*
>(malloc(required_size_ *
sizeof(T)));
266 throw std::bad_alloc{};
270 std::fill_n(ptr_, required_size_, init);
274 if (required_size_ > MaxStackSize) {
283 size_t required_size_;
284 T stack_mem_[MaxStackSize];
289 #endif // XGBOOST_COMMON_THREADING_UTILS_H_
@ kAuto
Definition: threading_utils.h:156
size_t Size() const
Definition: threading_utils.h:100
@ kGuided
Definition: threading_utils.h:159
~MemStackAllocator()
Definition: threading_utils.h:273
Optionally compressed gradient index. The compression works only with dense data.
Definition: hist_util.h:207
int32_t OmpGetNumThreads(int32_t n_threads)
Definition: threading_utils.h:241
size_t GetFirstDimension(size_t i) const
Definition: threading_utils.h:105
size_t begin() const
Definition: threading_utils.h:41
void ParallelFor(Index size, int32_t n_threads, Sched sched, Func fn)
Definition: threading_utils.h:170
static Sched Auto()
Definition: threading_utils.h:163
dmlc::omp_ulong omp_ulong
define unsigned long for openmp loop
Definition: base.h:271
T const & operator[](size_t i) const
Definition: threading_utils.h:279
Definition: threading_utils.h:70
int32_t omp_get_thread_limit() __GOMP_NOTHROW
Definition: threading_utils.h:19
T & operator[](size_t i)
Definition: threading_utils.h:278
BlockedSpace2d(size_t dim1, Func getter_size_dim2, size_t grain_size)
Definition: threading_utils.h:87
Range1d GetRange(size_t i) const
Definition: threading_utils.h:111
static Sched Static(size_t n=0)
Definition: threading_utils.h:165
static Sched Guided()
Definition: threading_utils.h:166
int32_t GetCfsCPUCount() noexcept
MemStackAllocator(size_t required_size)
Definition: threading_utils.h:259
MemStackAllocator(size_t required_size, T init)
Definition: threading_utils.h:269
enum xgboost::common::Sched::@0 sched
Definition: threading_utils.h:35
int32_t OmpGetThreadLimit()
Definition: threading_utils.h:233
Range1d(size_t begin, size_t end)
Definition: threading_utils.h:37
@ kStatic
Definition: threading_utils.h:158
A C-style array with in-stack allocation. As long as the array is smaller than MaxStackSize,...
Definition: threading_utils.h:257
Definition: threading_utils.h:154
void ParallelFor2d(const BlockedSpace2d &space, int nthreads, Func func)
Definition: threading_utils.h:129
@ kDynamic
Definition: threading_utils.h:157
size_t end() const
Definition: threading_utils.h:45
static Sched Dyn(size_t n=0)
Definition: threading_utils.h:164
size_t chunk
Definition: threading_utils.h:161
namespace of xgboost
Definition: base.h:110