xgboost
transform.h
Go to the documentation of this file.
1 
4 #ifndef XGBOOST_COMMON_TRANSFORM_H_
5 #define XGBOOST_COMMON_TRANSFORM_H_
6 
7 #include <dmlc/omp.h>
8 #include <dmlc/common.h>
9 
10 #include <xgboost/data.h>
11 #include <utility>
12 #include <vector>
13 #include <type_traits> // enable_if
14 
16 #include "xgboost/span.h"
17 
18 #include "common.h"
19 
20 #if defined (__CUDACC__)
21 #include "device_helpers.cuh"
22 #endif // defined (__CUDACC__)
23 
24 namespace xgboost {
25 namespace common {
26 
27 constexpr size_t kBlockThreads = 256;
28 
29 namespace detail {
30 
31 #if defined(__CUDACC__)
32 template <typename Functor, typename... SpanType>
33 __global__ void LaunchCUDAKernel(Functor _func, Range _range,
34  SpanType... _spans) {
35  for (auto i : dh::GridStrideRange(*_range.begin(), *_range.end())) {
36  _func(i, _spans...);
37  }
38 }
39 #endif // defined(__CUDACC__)
40 
41 } // namespace detail
42 
57 template <bool CompiledWithCuda = WITH_CUDA()>
58 class Transform {
59  private:
60  template <typename Functor>
61  struct Evaluator {
62  public:
63  Evaluator(Functor func, Range range, int device, bool shard) :
64  func_(func), range_{std::move(range)},
65  shard_{shard},
66  device_{device} {}
67 
74  template <typename... HDV>
75  void Eval(HDV... vectors) const {
76  bool on_device = device_ >= 0;
77 
78  if (on_device) {
79  LaunchCUDA(func_, vectors...);
80  } else {
81  LaunchCPU(func_, vectors...);
82  }
83  }
84 
85  private:
86  // CUDA UnpackHDV
87  template <typename T>
88  Span<T> UnpackHDVOnDevice(HostDeviceVector<T>* _vec) const {
89  auto span = _vec->DeviceSpan();
90  return span;
91  }
92  template <typename T>
93  Span<T const> UnpackHDVOnDevice(const HostDeviceVector<T>* _vec) const {
94  auto span = _vec->ConstDeviceSpan();
95  return span;
96  }
97  // CPU UnpackHDV
98  template <typename T>
99  Span<T> UnpackHDV(HostDeviceVector<T>* _vec) const {
100  return Span<T> {_vec->HostPointer(),
101  static_cast<typename Span<T>::index_type>(_vec->Size())};
102  }
103  template <typename T>
104  Span<T const> UnpackHDV(const HostDeviceVector<T>* _vec) const {
105  return Span<T const> {_vec->ConstHostPointer(),
106  static_cast<typename Span<T>::index_type>(_vec->Size())};
107  }
108  // Recursive sync host
109  template <typename T>
110  void SyncHost(const HostDeviceVector<T> *_vector) const {
111  _vector->ConstHostPointer();
112  }
113  template <typename Head, typename... Rest>
114  void SyncHost(const HostDeviceVector<Head> *_vector,
115  const HostDeviceVector<Rest> *... _vectors) const {
116  _vector->ConstHostPointer();
117  SyncHost(_vectors...);
118  }
119  // Recursive unpack for Shard.
120  template <typename T>
121  void UnpackShard(int device, const HostDeviceVector<T> *vector) const {
122  vector->SetDevice(device);
123  }
124  template <typename Head, typename... Rest>
125  void UnpackShard(int device,
126  const HostDeviceVector<Head> *_vector,
127  const HostDeviceVector<Rest> *... _vectors) const {
128  _vector->SetDevice(device);
129  UnpackShard(device, _vectors...);
130  }
131 
132 #if defined(__CUDACC__)
133  template <typename std::enable_if<CompiledWithCuda>::type* = nullptr,
134  typename... HDV>
135  void LaunchCUDA(Functor _func, HDV*... _vectors) const {
136  if (shard_) {
137  UnpackShard(device_, _vectors...);
138  }
139 
140  size_t range_size = *range_.end() - *range_.begin();
141 
142  // Extract index to deal with possible old OpenMP.
143  // This deals with situation like multi-class setting where
144  // granularity is used in data vector.
145  size_t shard_size = range_size;
146  Range shard_range {0, static_cast<Range::DifferenceType>(shard_size)};
147  dh::safe_cuda(cudaSetDevice(device_));
148  const int kGrids =
149  static_cast<int>(DivRoundUp(*(range_.end()), kBlockThreads));
150  if (kGrids == 0) {
151  return;
152  }
153  detail::LaunchCUDAKernel<<<kGrids, kBlockThreads>>>( // NOLINT
154  _func, shard_range, UnpackHDVOnDevice(_vectors)...);
155  }
156 #else
157 
158  template <typename std::enable_if<!CompiledWithCuda>::type* = nullptr,
159  typename... HDV>
160  void LaunchCUDA(Functor _func, HDV*...) const {
161  // Remove unused parameter compiler warning.
162  (void) _func;
163 
164  LOG(FATAL) << "Not part of device code. WITH_CUDA: " << WITH_CUDA();
165  }
166 #endif // defined(__CUDACC__)
167 
168  template <typename... HDV>
169  void LaunchCPU(Functor func, HDV*... vectors) const {
170  omp_ulong end = static_cast<omp_ulong>(*(range_.end()));
171  dmlc::OMPException omp_exc;
172  SyncHost(vectors...);
173 #pragma omp parallel for schedule(static)
174  for (omp_ulong idx = 0; idx < end; ++idx) {
175  omp_exc.Run(func, idx, UnpackHDV(vectors)...);
176  }
177  omp_exc.Rethrow();
178  }
179 
180  private:
182  Functor func_;
184  Range range_;
186  bool shard_;
187  int device_;
188  };
189 
190  public:
203  template <typename Functor>
204  static Evaluator<Functor> Init(Functor func, Range const range,
205  int device,
206  bool const shard = true) {
207  return Evaluator<Functor> {func, std::move(range), device, shard};
208  }
209 };
210 
211 } // namespace common
212 } // namespace xgboost
213 
214 #endif // XGBOOST_COMMON_TRANSFORM_H_
xgboost::common::Range::begin
XGBOOST_DEVICE Iterator begin() const
Definition: common.h:129
xgboost::common::Span::index_type
std::size_t index_type
Definition: span.h:416
xgboost::common::Transform::Init
static Evaluator< Functor > Init(Functor func, Range const range, int device, bool const shard=true)
Initialize a Transform object.
Definition: transform.h:204
xgboost::HostDeviceVector
Definition: host_device_vector.h:86
host_device_vector.h
A device-and-host vector abstraction layer.
xgboost::HostDeviceVector::DeviceSpan
common::Span< T > DeviceSpan()
xgboost::common::Transform
Do Transformation on HostDeviceVectors.
Definition: transform.h:58
WITH_CUDA
#define WITH_CUDA()
Definition: common.h:30
xgboost::omp_ulong
dmlc::omp_ulong omp_ulong
define unsigned long for openmp loop
Definition: base.h:268
xgboost::HostDeviceVector::SetDevice
void SetDevice(int device) const
xgboost::common::Range::end
XGBOOST_DEVICE Iterator end() const
Definition: common.h:130
span.h
xgboost::common::Range::DifferenceType
int64_t DifferenceType
Definition: common.h:93
xgboost::common::kBlockThreads
constexpr size_t kBlockThreads
Definition: transform.h:27
xgboost::common::Range
Definition: common.h:91
xgboost::common::DivRoundUp
XGBOOST_DEVICE T1 DivRoundUp(const T1 a, const T2 b)
Definition: common.h:84
xgboost::HostDeviceVector::Size
size_t Size() const
xgboost::HostDeviceVector::HostPointer
T * HostPointer()
Definition: host_device_vector.h:111
common.h
Common utilities.
xgboost::common::Span
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition: span.h:137
data.h
The input data structure of xgboost.
xgboost::HostDeviceVector::ConstHostPointer
const T * ConstHostPointer() const
Definition: host_device_vector.h:115
xgboost::HostDeviceVector::ConstDeviceSpan
common::Span< const T > ConstDeviceSpan() const
xgboost
namespace of xgboost
Definition: base.h:110