xgboost
transform.h
Go to the documentation of this file.
1 
4 #ifndef XGBOOST_COMMON_TRANSFORM_H_
5 #define XGBOOST_COMMON_TRANSFORM_H_
6 
7 #include <dmlc/omp.h>
8 #include <dmlc/common.h>
9 
10 #include <xgboost/data.h>
11 #include <utility>
12 #include <vector>
13 #include <type_traits> // enable_if
14 
16 #include "xgboost/span.h"
17 
18 #include "common.h"
19 #include "threading_utils.h"
20 
21 #if defined (__CUDACC__)
22 #include "device_helpers.cuh"
23 #endif // defined (__CUDACC__)
24 
25 namespace xgboost {
26 namespace common {
27 
28 constexpr size_t kBlockThreads = 256;
29 
30 namespace detail {
31 
32 #if defined(__CUDACC__)
33 template <typename Functor, typename... SpanType>
34 __global__ void LaunchCUDAKernel(Functor _func, Range _range,
35  SpanType... _spans) {
36  for (auto i : dh::GridStrideRange(*_range.begin(), *_range.end())) {
37  _func(i, _spans...);
38  }
39 }
40 #endif // defined(__CUDACC__)
41 
42 } // namespace detail
43 
58 template <bool CompiledWithCuda = WITH_CUDA()>
59 class Transform {
60  private:
61  template <typename Functor>
62  struct Evaluator {
63  public:
64  Evaluator(Functor func, Range range, int device, bool shard) :
65  func_(func), range_{std::move(range)},
66  shard_{shard},
67  device_{device} {}
68 
75  template <typename... HDV>
76  void Eval(HDV... vectors) const {
77  bool on_device = device_ >= 0;
78 
79  if (on_device) {
80  LaunchCUDA(func_, vectors...);
81  } else {
82  LaunchCPU(func_, vectors...);
83  }
84  }
85 
86  private:
87  // CUDA UnpackHDV
88  template <typename T>
89  Span<T> UnpackHDVOnDevice(HostDeviceVector<T>* _vec) const {
90  auto span = _vec->DeviceSpan();
91  return span;
92  }
93  template <typename T>
94  Span<T const> UnpackHDVOnDevice(const HostDeviceVector<T>* _vec) const {
95  auto span = _vec->ConstDeviceSpan();
96  return span;
97  }
98  // CPU UnpackHDV
99  template <typename T>
100  Span<T> UnpackHDV(HostDeviceVector<T>* _vec) const {
101  return Span<T> {_vec->HostPointer(),
102  static_cast<typename Span<T>::index_type>(_vec->Size())};
103  }
104  template <typename T>
105  Span<T const> UnpackHDV(const HostDeviceVector<T>* _vec) const {
106  return Span<T const> {_vec->ConstHostPointer(),
107  static_cast<typename Span<T>::index_type>(_vec->Size())};
108  }
109  // Recursive sync host
110  template <typename T>
111  void SyncHost(const HostDeviceVector<T> *_vector) const {
112  _vector->ConstHostPointer();
113  }
114  template <typename Head, typename... Rest>
115  void SyncHost(const HostDeviceVector<Head> *_vector,
116  const HostDeviceVector<Rest> *... _vectors) const {
117  _vector->ConstHostPointer();
118  SyncHost(_vectors...);
119  }
120  // Recursive unpack for Shard.
121  template <typename T>
122  void UnpackShard(int device, const HostDeviceVector<T> *vector) const {
123  vector->SetDevice(device);
124  }
125  template <typename Head, typename... Rest>
126  void UnpackShard(int device,
127  const HostDeviceVector<Head> *_vector,
128  const HostDeviceVector<Rest> *... _vectors) const {
129  _vector->SetDevice(device);
130  UnpackShard(device, _vectors...);
131  }
132 
133 #if defined(__CUDACC__)
134  template <typename std::enable_if<CompiledWithCuda>::type* = nullptr,
135  typename... HDV>
136  void LaunchCUDA(Functor _func, HDV*... _vectors) const {
137  if (shard_) {
138  UnpackShard(device_, _vectors...);
139  }
140 
141  size_t range_size = *range_.end() - *range_.begin();
142 
143  // Extract index to deal with possible old OpenMP.
144  // This deals with situation like multi-class setting where
145  // granularity is used in data vector.
146  size_t shard_size = range_size;
147  Range shard_range {0, static_cast<Range::DifferenceType>(shard_size)};
148  dh::safe_cuda(cudaSetDevice(device_));
149  const int kGrids =
150  static_cast<int>(DivRoundUp(*(range_.end()), kBlockThreads));
151  if (kGrids == 0) {
152  return;
153  }
154  detail::LaunchCUDAKernel<<<kGrids, kBlockThreads>>>( // NOLINT
155  _func, shard_range, UnpackHDVOnDevice(_vectors)...);
156  }
157 #else
158 
159  template <typename std::enable_if<!CompiledWithCuda>::type* = nullptr,
160  typename... HDV>
161  void LaunchCUDA(Functor _func, HDV*...) const {
162  // Remove unused parameter compiler warning.
163  (void) _func;
164 
165  LOG(FATAL) << "Not part of device code. WITH_CUDA: " << WITH_CUDA();
166  }
167 #endif // defined(__CUDACC__)
168 
169  template <typename... HDV>
170  void LaunchCPU(Functor func, HDV*... vectors) const {
171  omp_ulong end = static_cast<omp_ulong>(*(range_.end()));
172  SyncHost(vectors...);
173  ParallelFor(end, [&](omp_ulong idx) {
174  func(idx, UnpackHDV(vectors)...);
175  });
176  }
177 
178  private:
180  Functor func_;
182  Range range_;
184  bool shard_;
185  int device_;
186  };
187 
188  public:
201  template <typename Functor>
202  static Evaluator<Functor> Init(Functor func, Range const range,
203  int device,
204  bool const shard = true) {
205  return Evaluator<Functor> {func, std::move(range), device, shard};
206  }
207 };
208 
209 } // namespace common
210 } // namespace xgboost
211 
212 #endif // XGBOOST_COMMON_TRANSFORM_H_
xgboost::common::Range::begin
XGBOOST_DEVICE Iterator begin() const
Definition: common.h:130
xgboost::common::Span::index_type
std::size_t index_type
Definition: span.h:421
xgboost::common::Transform::Init
static Evaluator< Functor > Init(Functor func, Range const range, int device, bool const shard=true)
Initialize a Transform object.
Definition: transform.h:202
xgboost::HostDeviceVector
Definition: host_device_vector.h:86
host_device_vector.h
A device-and-host vector abstraction layer.
xgboost::HostDeviceVector::DeviceSpan
common::Span< T > DeviceSpan()
xgboost::common::Transform
Do Transformation on HostDeviceVectors.
Definition: transform.h:59
WITH_CUDA
#define WITH_CUDA()
Definition: common.h:31
xgboost::omp_ulong
dmlc::omp_ulong omp_ulong
define unsigned long for openmp loop
Definition: base.h:268
xgboost::HostDeviceVector::SetDevice
void SetDevice(int device) const
xgboost::common::Range::end
XGBOOST_DEVICE Iterator end() const
Definition: common.h:131
span.h
xgboost::common::Range::DifferenceType
int64_t DifferenceType
Definition: common.h:94
xgboost::common::kBlockThreads
constexpr size_t kBlockThreads
Definition: transform.h:28
xgboost::common::Range
Definition: common.h:92
xgboost::common::DivRoundUp
XGBOOST_DEVICE T1 DivRoundUp(const T1 a, const T2 b)
Definition: common.h:85
xgboost::common::ParallelFor
void ParallelFor(Index size, size_t nthreads, Func fn)
Definition: threading_utils.h:137
xgboost::HostDeviceVector::Size
size_t Size() const
xgboost::HostDeviceVector::HostPointer
T * HostPointer()
Definition: host_device_vector.h:111
common.h
Common utilities.
xgboost::common::Span
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition: span.h:142
data.h
The input data structure of xgboost.
threading_utils.h
xgboost::HostDeviceVector::ConstHostPointer
const T * ConstHostPointer() const
Definition: host_device_vector.h:115
xgboost::HostDeviceVector::ConstDeviceSpan
common::Span< const T > ConstDeviceSpan() const
xgboost
namespace of xgboost
Definition: base.h:110