xgboost
transform.h
Go to the documentation of this file.
1 
4 #ifndef XGBOOST_COMMON_TRANSFORM_H_
5 #define XGBOOST_COMMON_TRANSFORM_H_
6 
7 #include <dmlc/common.h>
8 #include <dmlc/omp.h>
9 #include <xgboost/data.h>
10 
11 #include <type_traits> // enable_if
12 #include <utility>
13 #include <vector>
14 
15 #include "common.h"
16 #include "threading_utils.h"
18 #include "xgboost/span.h"
19 
20 #if defined (__CUDACC__)
21 #include "device_helpers.cuh"
22 #endif // defined (__CUDACC__)
23 
24 namespace xgboost {
25 namespace common {
26 
27 constexpr size_t kBlockThreads = 256;
28 
29 namespace detail {
30 
31 #if defined(__CUDACC__)
32 template <typename Functor, typename... SpanType>
33 __global__ void LaunchCUDAKernel(Functor _func, Range _range,
34  SpanType... _spans) {
35  for (auto i : dh::GridStrideRange(*_range.begin(), *_range.end())) {
36  _func(i, _spans...);
37  }
38 }
39 #endif // defined(__CUDACC__)
40 
41 } // namespace detail
42 
57 template <bool CompiledWithCuda = WITH_CUDA()>
58 class Transform {
59  private:
60  template <typename Functor>
61  struct Evaluator {
62  public:
63  Evaluator(Functor func, Range range, int32_t n_threads, int32_t device_idx)
64  : func_(func), range_{std::move(range)}, n_threads_{n_threads}, device_{device_idx} {}
65 
72  template <typename... HDV>
73  void Eval(HDV... vectors) const {
74  bool on_device = device_ >= 0;
75 
76  if (on_device) {
77  LaunchCUDA(func_, vectors...);
78  } else {
79  LaunchCPU(func_, vectors...);
80  }
81  }
82 
83  private:
84  // CUDA UnpackHDV
85  template <typename T>
86  Span<T> UnpackHDVOnDevice(HostDeviceVector<T>* _vec) const {
87  auto span = _vec->DeviceSpan();
88  return span;
89  }
90  template <typename T>
91  Span<T const> UnpackHDVOnDevice(const HostDeviceVector<T>* _vec) const {
92  auto span = _vec->ConstDeviceSpan();
93  return span;
94  }
95  // CPU UnpackHDV
96  template <typename T>
97  Span<T> UnpackHDV(HostDeviceVector<T>* _vec) const {
98  return Span<T> {_vec->HostPointer(),
99  static_cast<typename Span<T>::index_type>(_vec->Size())};
100  }
101  template <typename T>
102  Span<T const> UnpackHDV(const HostDeviceVector<T>* _vec) const {
103  return Span<T const> {_vec->ConstHostPointer(),
104  static_cast<typename Span<T>::index_type>(_vec->Size())};
105  }
106  // Recursive sync host
107  template <typename T>
108  void SyncHost(const HostDeviceVector<T> *_vector) const {
109  _vector->ConstHostPointer();
110  }
111  template <typename Head, typename... Rest>
112  void SyncHost(const HostDeviceVector<Head> *_vector,
113  const HostDeviceVector<Rest> *... _vectors) const {
114  _vector->ConstHostPointer();
115  SyncHost(_vectors...);
116  }
117  // Recursive unpack for Shard.
118  template <typename T>
119  void UnpackShard(int device, const HostDeviceVector<T> *vector) const {
120  vector->SetDevice(device);
121  }
122  template <typename Head, typename... Rest>
123  void UnpackShard(int device,
124  const HostDeviceVector<Head> *_vector,
125  const HostDeviceVector<Rest> *... _vectors) const {
126  _vector->SetDevice(device);
127  UnpackShard(device, _vectors...);
128  }
129 
130 #if defined(__CUDACC__)
131  template <typename std::enable_if<CompiledWithCuda>::type* = nullptr,
132  typename... HDV>
133  void LaunchCUDA(Functor _func, HDV*... _vectors) const {
134  UnpackShard(device_, _vectors...);
135 
136  size_t range_size = *range_.end() - *range_.begin();
137 
138  // Extract index to deal with possible old OpenMP.
139  // This deals with situation like multi-class setting where
140  // granularity is used in data vector.
141  size_t shard_size = range_size;
142  Range shard_range {0, static_cast<Range::DifferenceType>(shard_size)};
143  dh::safe_cuda(cudaSetDevice(device_));
144  const int kGrids =
145  static_cast<int>(DivRoundUp(*(range_.end()), kBlockThreads));
146  if (kGrids == 0) {
147  return;
148  }
149  detail::LaunchCUDAKernel<<<kGrids, kBlockThreads>>>( // NOLINT
150  _func, shard_range, UnpackHDVOnDevice(_vectors)...);
151  }
152 #else
153 
154  template <typename std::enable_if<!CompiledWithCuda>::type* = nullptr,
155  typename... HDV>
156  void LaunchCUDA(Functor _func, HDV*...) const {
157  // Remove unused parameter compiler warning.
158  (void) _func;
159 
160  LOG(FATAL) << "Not part of device code. WITH_CUDA: " << WITH_CUDA();
161  }
162 #endif // defined(__CUDACC__)
163 
164  template <typename... HDV>
165  void LaunchCPU(Functor func, HDV *...vectors) const {
166  omp_ulong end = static_cast<omp_ulong>(*(range_.end()));
167  SyncHost(vectors...);
168  ParallelFor(end, n_threads_, [&](omp_ulong idx) { func(idx, UnpackHDV(vectors)...); });
169  }
170 
171  private:
173  Functor func_;
175  Range range_;
176  int32_t n_threads_;
177  int32_t device_;
178  };
179 
180  public:
193  template <typename Functor>
194  static Evaluator<Functor> Init(Functor func, Range const range, int32_t n_threads,
195  int32_t device_idx) {
196  return Evaluator<Functor>{func, std::move(range), n_threads, device_idx};
197  }
198 };
199 
200 } // namespace common
201 } // namespace xgboost
202 
203 #endif // XGBOOST_COMMON_TRANSFORM_H_
xgboost::common::Range::begin
XGBOOST_DEVICE Iterator begin() const
Definition: common.h:144
xgboost::HostDeviceVector
Definition: host_device_vector.h:86
host_device_vector.h
A device-and-host vector abstraction layer.
xgboost::HostDeviceVector::DeviceSpan
common::Span< T > DeviceSpan()
xgboost::common::Transform
Do Transformation on HostDeviceVectors.
Definition: transform.h:58
xgboost::common::ParallelFor
void ParallelFor(Index size, int32_t n_threads, Sched sched, Func fn)
Definition: threading_utils.h:170
WITH_CUDA
#define WITH_CUDA()
Definition: common.h:32
xgboost::omp_ulong
dmlc::omp_ulong omp_ulong
define unsigned long for openmp loop
Definition: base.h:271
xgboost::HostDeviceVector::SetDevice
void SetDevice(int device) const
xgboost::common::Range::end
XGBOOST_DEVICE Iterator end() const
Definition: common.h:145
span.h
xgboost::common::Range::DifferenceType
int64_t DifferenceType
Definition: common.h:108
xgboost::common::kBlockThreads
constexpr size_t kBlockThreads
Definition: transform.h:27
xgboost::common::Transform::Init
static Evaluator< Functor > Init(Functor func, Range const range, int32_t n_threads, int32_t device_idx)
Initialize a Transform object.
Definition: transform.h:194
xgboost::common::Range
Definition: common.h:106
xgboost::common::DivRoundUp
XGBOOST_DEVICE T1 DivRoundUp(const T1 a, const T2 b)
Definition: common.h:86
xgboost::HostDeviceVector::Size
size_t Size() const
xgboost::HostDeviceVector::HostPointer
T * HostPointer()
Definition: host_device_vector.h:111
common.h
Common utilities.
xgboost::common::Span< T >
data.h
The input data structure of xgboost.
threading_utils.h
xgboost::HostDeviceVector::ConstHostPointer
const T * ConstHostPointer() const
Definition: host_device_vector.h:115
xgboost::HostDeviceVector::ConstDeviceSpan
common::Span< const T > ConstDeviceSpan() const
xgboost
namespace of xgboost
Definition: base.h:110