4 #ifndef XGBOOST_COMMON_TRANSFORM_H_ 5 #define XGBOOST_COMMON_TRANSFORM_H_ 11 #include <type_traits> 17 #if defined (__CUDACC__) 18 #include "device_helpers.cuh" 19 #endif // defined (__CUDACC__) 28 #if defined(__CUDACC__) 29 template <
typename Functor,
typename... SpanType>
30 __global__
void LaunchCUDAKernel(Functor _func,
Range _range,
32 for (
auto i : dh::GridStrideRange(*_range.
begin(), *_range.
end())) {
36 #endif // defined(__CUDACC__) 54 template <
bool CompiledWithCuda = WITH_CUDA()>
57 template <
typename Functor>
60 Evaluator(Functor func,
Range range,
GPUSet devices,
bool shard) :
61 func_(func), range_{std::move(range)},
66 func_(func), range_{std::move(range)}, shard_{shard},
67 distribution_{std::move(dist)} {}
75 template <
typename... HDV>
76 void Eval(HDV... vectors)
const {
77 bool on_device = !distribution_.IsEmpty();
80 LaunchCUDA(func_, vectors...);
82 LaunchCPU(func_, vectors...);
104 template <
typename T>
110 template <
typename T>
114 template <
typename Head,
typename... Rest>
118 _vector->
Shard(dist);
119 UnpackShard(dist, _vectors...);
122 #if defined(__CUDACC__) 123 template <typename std::enable_if<CompiledWithCuda>::type* =
nullptr,
125 void LaunchCUDA(Functor _func, HDV*... _vectors)
const {
127 UnpackShard(distribution_, _vectors...);
129 GPUSet devices = distribution_.Devices();
130 size_t range_size = *range_.
end() - *range_.begin();
133 size_t device_beg = *(devices.
begin());
134 size_t device_end = *(devices.
end());
135 #pragma omp parallel for schedule(static, 1) if (devices.Size() > 1) 136 for (
omp_ulong device = device_beg; device < device_end; ++device) {
141 range_size, devices.
Index(device));
143 dh::safe_cuda(cudaSetDevice(device));
144 const int GRID_SIZE =
145 static_cast<int>(dh::DivRoundUp(*(range_.end()), kBlockThreads));
146 detail::LaunchCUDAKernel<<<GRID_SIZE, kBlockThreads>>>(
147 _func, shard_range, UnpackHDV(_vectors, device)...);
152 template <typename std::enable_if<!CompiledWithCuda>::type* =
nullptr,
154 void LaunchCUDA(Functor _func, HDV*... _vectors)
const {
155 LOG(FATAL) <<
"Not part of device code. WITH_CUDA: " <<
WITH_CUDA();
157 #endif // defined(__CUDACC__) 159 template <
typename... HDV>
160 void LaunchCPU(Functor func, HDV*... vectors)
const {
162 #pragma omp parallel for schedule(static) 163 for (
omp_ulong idx = 0; idx < end; ++idx) {
164 func(idx, UnpackHDV(vectors)...);
192 template <
typename Functor>
193 static Evaluator<Functor>
Init(Functor func,
Range const range,
195 bool const shard =
true) {
196 return Evaluator<Functor> {func, std::move(range), std::move(devices), shard};
198 template <
typename Functor>
199 static Evaluator<Functor>
Init(Functor func,
Range const range,
201 bool const shard =
true) {
202 return Evaluator<Functor> {func, std::move(range), std::move(dist), shard};
209 #endif // XGBOOST_COMMON_TRANSFORM_H_ common::Span< T > DeviceSpan(int device)
Definition: host_device_vector.h:87
size_t ShardSize(size_t size, size_t index) const
Definition: host_device_vector.h:149
XGBOOST_DEVICE Iterator begin() const
Definition: common.h:116
Definition: host_device_vector.h:200
detail::ptrdiff_t index_type
Definition: span.h:387
The input data structure of xgboost.
T * HostPointer()
Definition: host_device_vector.h:221
dmlc::omp_ulong omp_ulong
define unsigned long for openmp loop
Definition: base.h:206
int64_t DifferenceType
Definition: common.h:80
A device-and-host vector abstraction layer.
XGBOOST_DEVICE Iterator end() const
Definition: common.h:117
const T * ConstHostPointer() const
Definition: host_device_vector.h:222
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition: span.h:109
common::Range::Iterator end() const
Definition: common.h:234
#define WITH_CUDA()
Definition: common.h:27
common::Range::Iterator begin() const
Definition: common.h:233
static GPUDistribution Block(GPUSet devices)
Definition: host_device_vector.h:103
constexpr size_t kBlockThreads
Definition: transform.h:24
common::Span< const T > ConstDeviceSpan(int device) const
namespace of xgboost
Definition: base.h:79
size_t Index(GpuIdType device) const
Definition: common.h:219
void Shard(const GPUDistribution &distribution) const
Specify memory distribution.