6 #ifndef XGBOOST_COMMON_THREADING_UTILS_H_
7 #define XGBOOST_COMMON_THREADING_UTILS_H_
9 #include <dmlc/common.h>
14 #include <type_traits>
17 #include "xgboost/logging.h"
23 #endif // !defined(_OPENMP)
26 #if defined(_OPENMP) && defined(_MSC_VER)
30 #endif // defined(_MSC_VER)
88 template<
typename Func>
90 for (
size_t i = 0; i < dim1; ++i) {
91 const size_t size = getter_size_dim2(i);
92 const size_t n_blocks = size/grain_size + !!(size % grain_size);
93 for (
size_t iblock = 0; iblock < n_blocks; ++iblock) {
94 const size_t begin = iblock * grain_size;
95 const size_t end = std::min(begin + grain_size, size);
96 AddBlock(i, begin, end);
103 return ranges_.size();
108 CHECK_LT(i, first_dimension_.size());
109 return first_dimension_[i];
114 CHECK_LT(i, ranges_.size());
119 void AddBlock(
size_t first_dimension,
size_t begin,
size_t end) {
120 first_dimension_.push_back(first_dimension);
121 ranges_.emplace_back(begin, end);
124 std::vector<Range1d> ranges_;
125 std::vector<size_t> first_dimension_;
130 template <
typename Func>
132 const size_t num_blocks_in_space = space.
Size();
133 nthreads = std::min(nthreads, omp_get_max_threads());
134 nthreads = std::max(nthreads, 1);
136 dmlc::OMPException exc;
137 #pragma omp parallel num_threads(nthreads)
140 size_t tid = omp_get_thread_num();
142 num_blocks_in_space / nthreads + !!(num_blocks_in_space % nthreads);
144 size_t begin = chunck_size * tid;
145 size_t end = std::min(begin + chunck_size, num_blocks_in_space);
146 for (
auto i = begin; i < end; i++) {
172 template <
typename Index,
typename Func>
174 #if defined(_MSC_VER)
176 using OmpInd = std::conditional_t<std::is_signed<Index>::value,
Index,
omp_ulong>;
178 using OmpInd =
Index;
180 OmpInd length =
static_cast<OmpInd
>(size);
182 dmlc::OMPException exc;
183 switch (sched.
sched) {
185 #pragma omp parallel for num_threads(n_threads)
186 for (OmpInd i = 0; i < length; ++i) {
192 if (sched.
chunk == 0) {
193 #pragma omp parallel for num_threads(n_threads) schedule(dynamic)
194 for (OmpInd i = 0; i < length; ++i) {
198 #pragma omp parallel for num_threads(n_threads) schedule(dynamic, sched.chunk)
199 for (OmpInd i = 0; i < length; ++i) {
206 if (sched.
chunk == 0) {
207 #pragma omp parallel for num_threads(n_threads) schedule(static)
208 for (OmpInd i = 0; i < length; ++i) {
212 #pragma omp parallel for num_threads(n_threads) schedule(static, sched.chunk)
213 for (OmpInd i = 0; i < length; ++i) {
220 #pragma omp parallel for num_threads(n_threads) schedule(guided)
221 for (OmpInd i = 0; i < length; ++i) {
230 template <
typename Index,
typename Func>
237 template <
typename Index,
typename Func>
245 CHECK_GE(limit, 1) <<
"Invalid thread limit for OpenMP.";
257 auto& threads = *p_threads;
258 int32_t nthread_original = omp_get_max_threads();
260 threads = omp_get_num_procs();
263 omp_set_num_threads(threads);
264 return nthread_original;
268 auto& threads = *p_threads;
269 int32_t nthread_original = omp_get_max_threads();
271 threads = nthread_original;
274 omp_set_num_threads(threads);
275 return nthread_original;
279 if (n_threads <= 0) {
280 n_threads = omp_get_num_procs();
288 #endif // XGBOOST_COMMON_THREADING_UTILS_H_