xgboost
transform.h
Go to the documentation of this file.
1 
4 #ifndef XGBOOST_COMMON_TRANSFORM_H_
5 #define XGBOOST_COMMON_TRANSFORM_H_
6 
7 #include <dmlc/omp.h>
8 #include <dmlc/common.h>
9 
10 #include <xgboost/data.h>
11 #include <utility>
12 #include <vector>
13 #include <type_traits> // enable_if
14 
16 #include "xgboost/span.h"
17 
18 #include "common.h"
19 
20 #if defined (__CUDACC__)
21 #include "device_helpers.cuh"
22 #endif // defined (__CUDACC__)
23 
24 namespace xgboost {
25 namespace common {
26 
27 constexpr size_t kBlockThreads = 256;
28 
29 namespace detail {
30 
31 #if defined(__CUDACC__)
32 template <typename Functor, typename... SpanType>
33 __global__ void LaunchCUDAKernel(Functor _func, Range _range,
34  SpanType... _spans) {
35  for (auto i : dh::GridStrideRange(*_range.begin(), *_range.end())) {
36  _func(i, _spans...);
37  }
38 }
39 #endif // defined(__CUDACC__)
40 
41 } // namespace detail
42 
57 template <bool CompiledWithCuda = WITH_CUDA()>
58 class Transform {
59  private:
60  template <typename Functor>
61  struct Evaluator {
62  public:
63  Evaluator(Functor func, Range range, int device, bool shard) :
64  func_(func), range_{std::move(range)},
65  shard_{shard},
66  device_{device} {}
67 
74  template <typename... HDV>
75  void Eval(HDV... vectors) const {
76  bool on_device = device_ >= 0;
77 
78  if (on_device) {
79  LaunchCUDA(func_, vectors...);
80  } else {
81  LaunchCPU(func_, vectors...);
82  }
83  }
84 
85  private:
86  // CUDA UnpackHDV
87  template <typename T>
88  Span<T> UnpackHDVOnDevice(HostDeviceVector<T>* _vec) const {
89  auto span = _vec->DeviceSpan();
90  return span;
91  }
92  template <typename T>
93  Span<T const> UnpackHDVOnDevice(const HostDeviceVector<T>* _vec) const {
94  auto span = _vec->ConstDeviceSpan();
95  return span;
96  }
97  // CPU UnpackHDV
98  template <typename T>
99  Span<T> UnpackHDV(HostDeviceVector<T>* _vec) const {
100  return Span<T> {_vec->HostPointer(),
101  static_cast<typename Span<T>::index_type>(_vec->Size())};
102  }
103  template <typename T>
104  Span<T const> UnpackHDV(const HostDeviceVector<T>* _vec) const {
105  return Span<T const> {_vec->ConstHostPointer(),
106  static_cast<typename Span<T>::index_type>(_vec->Size())};
107  }
108  // Recursive sync host
109  template <typename T>
110  void SyncHost(const HostDeviceVector<T> *_vector) const {
111  _vector->ConstHostPointer();
112  }
113  template <typename Head, typename... Rest>
114  void SyncHost(const HostDeviceVector<Head> *_vector,
115  const HostDeviceVector<Rest> *... _vectors) const {
116  _vector->ConstHostPointer();
117  SyncHost(_vectors...);
118  }
119  // Recursive unpack for Shard.
120  template <typename T>
121  void UnpackShard(int device, const HostDeviceVector<T> *vector) const {
122  vector->SetDevice(device);
123  }
124  template <typename Head, typename... Rest>
125  void UnpackShard(int device,
126  const HostDeviceVector<Head> *_vector,
127  const HostDeviceVector<Rest> *... _vectors) const {
128  _vector->SetDevice(device);
129  UnpackShard(device, _vectors...);
130  }
131 
132 #if defined(__CUDACC__)
133  template <typename std::enable_if<CompiledWithCuda>::type* = nullptr,
134  typename... HDV>
135  void LaunchCUDA(Functor _func, HDV*... _vectors) const {
136  if (shard_) {
137  UnpackShard(device_, _vectors...);
138  }
139 
140  size_t range_size = *range_.end() - *range_.begin();
141 
142  // Extract index to deal with possible old OpenMP.
143  // This deals with situation like multi-class setting where
144  // granularity is used in data vector.
145  size_t shard_size = range_size;
146  Range shard_range {0, static_cast<Range::DifferenceType>(shard_size)};
147  dh::safe_cuda(cudaSetDevice(device_));
148  const int kGrids =
149  static_cast<int>(DivRoundUp(*(range_.end()), kBlockThreads));
150  if (kGrids == 0) {
151  return;
152  }
153  detail::LaunchCUDAKernel<<<kGrids, kBlockThreads>>>( // NOLINT
154  _func, shard_range, UnpackHDVOnDevice(_vectors)...);
155  }
156 #else
157 
158  template <typename std::enable_if<!CompiledWithCuda>::type* = nullptr,
159  typename... HDV>
160  void LaunchCUDA(Functor _func, HDV*... _vectors) const {
161  LOG(FATAL) << "Not part of device code. WITH_CUDA: " << WITH_CUDA();
162  }
163 #endif // defined(__CUDACC__)
164 
165  template <typename... HDV>
166  void LaunchCPU(Functor func, HDV*... vectors) const {
167  omp_ulong end = static_cast<omp_ulong>(*(range_.end()));
168  dmlc::OMPException omp_exc;
169  SyncHost(vectors...);
170 #pragma omp parallel for schedule(static)
171  for (omp_ulong idx = 0; idx < end; ++idx) {
172  omp_exc.Run(func, idx, UnpackHDV(vectors)...);
173  }
174  omp_exc.Rethrow();
175  }
176 
177  private:
179  Functor func_;
181  Range range_;
183  bool shard_;
184  int device_;
185  };
186 
187  public:
200  template <typename Functor>
201  static Evaluator<Functor> Init(Functor func, Range const range,
202  int device,
203  bool const shard = true) {
204  return Evaluator<Functor> {func, std::move(range), device, shard};
205  }
206 };
207 
208 } // namespace common
209 } // namespace xgboost
210 
211 #endif // XGBOOST_COMMON_TRANSFORM_H_
XGBOOST_DEVICE Iterator begin() const
Definition: common.h:126
std::size_t index_type
Definition: span.h:405
Definition: host_device_vector.h:86
The input data structure of xgboost.
T * HostPointer()
Definition: host_device_vector.h:111
dmlc::omp_ulong omp_ulong
define unsigned long for openmp loop
Definition: base.h:259
Do Transformation on HostDeviceVectors.
Definition: transform.h:58
Definition: common.h:88
int64_t DifferenceType
Definition: common.h:90
A device-and-host vector abstraction layer.
XGBOOST_DEVICE Iterator end() const
Definition: common.h:127
common::Span< const T > ConstDeviceSpan() const
const T * ConstHostPointer() const
Definition: host_device_vector.h:115
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition: span.h:126
static Evaluator< Functor > Init(Functor func, Range const range, int device, bool const shard=true)
Initialize a Transform object.
Definition: transform.h:201
common::Span< T > DeviceSpan()
XGBOOST_DEVICE T1 DivRoundUp(const T1 a, const T2 b)
Definition: common.h:81
void SetDevice(int device) const
#define WITH_CUDA()
Definition: common.h:27
constexpr size_t kBlockThreads
Definition: transform.h:27
namespace of xgboost
Definition: base.h:102
Common utilities.