52 #ifndef XGBOOST_COMMON_HOST_DEVICE_VECTOR_H_ 53 #define XGBOOST_COMMON_HOST_DEVICE_VECTOR_H_ 55 #include <dmlc/logging.h> 59 #include <initializer_list> 69 #include <thrust/device_ptr.h> 77 void SetCudaSetDeviceHandler(
void (*handler)(
int));
92 : devices_(devices), granularity_(1), overlap_(0) {}
96 std::vector<size_t> &&offsets)
97 : devices_(devices), granularity_(granularity), overlap_(overlap),
98 offsets_(std::move(offsets)) {}
110 return GPUDistribution(devices, granularity, 0, std::vector<size_t>());
121 bool const res = a.devices_ == b.devices_ &&
122 a.granularity_ == b.granularity_ &&
123 a.overlap_ == b.overlap_ &&
124 a.offsets_ == b.offsets_;
137 if (size == 0) {
return 0; }
138 if (offsets_.size() > 0) {
140 CHECK_EQ(offsets_.back(), size);
141 return offsets_.at(index);
144 size_t begin = std::min(index * Portion(size), size);
145 begin = begin > size ? size : begin;
150 if (size == 0) {
return 0; }
151 if (offsets_.size() > 0) {
153 CHECK_EQ(offsets_.back(), size);
154 return offsets_.at(index + 1) - offsets_.at(index) +
155 (index == devices_.
Size() - 1 ? overlap_ : 0);
157 size_t portion = Portion(size);
158 size_t begin = std::min(index * portion, size);
159 size_t end = std::min((index + 1) * portion + overlap_ * granularity_, size);
164 if (size == 0) {
return 0; }
165 return ShardSize(size, index) - (devices_.
Size() - 1 > index ? overlap_ : 0);
171 static size_t DivRoundUp(
size_t a,
size_t b) {
return (a + b - 1) / b; }
172 static size_t RoundUp(
size_t a,
size_t b) {
return DivRoundUp(a, b) * b; }
174 size_t Portion(
size_t size)
const {
177 (std::max(static_cast<int64_t>(size - overlap_ * granularity_),
178 static_cast<int64_t>(1)),
179 devices_.
Size()), granularity_);
186 std::vector<size_t> offsets_;
196 return static_cast<GPUAccess>(
static_cast<int>(a) - static_cast<int>(b));
199 template <
typename T>
217 T* DevicePointer(
int device);
218 const T* ConstDevicePointer(
int device)
const;
219 const T*
DevicePointer(
int device)
const {
return ConstDevicePointer(device); }
225 size_t DeviceStart(
int device)
const;
226 size_t DeviceSize(
int device)
const;
231 thrust::device_ptr<T> tbegin(
int device);
232 thrust::device_ptr<T> tend(
int device);
233 thrust::device_ptr<const T> tcbegin(
int device)
const;
234 thrust::device_ptr<const T> tcend(
int device)
const;
235 thrust::device_ptr<const T> tbegin(
int device)
const {
236 return tcbegin(device);
238 thrust::device_ptr<const T> tend(
int device)
const {
return tcend(device); }
240 void ScatterFrom(thrust::device_ptr<const T> begin, thrust::device_ptr<const T> end);
241 void GatherTo(thrust::device_ptr<T> begin, thrust::device_ptr<T> end)
const;
246 void Copy(
const std::vector<T>& other);
247 void Copy(std::initializer_list<T> other);
249 std::vector<T>& HostVector();
250 const std::vector<T>& ConstHostVector()
const;
251 const std::vector<T>&
HostVector()
const {
return ConstHostVector(); }
253 bool HostCanAccess(
GPUAccess access)
const;
254 bool DeviceCanAccess(
int device,
GPUAccess access)
const;
260 void Shard(
GPUSet devices)
const;
267 void Resize(
size_t new_size, T v = T());
275 #endif // XGBOOST_COMMON_HOST_DEVICE_VECTOR_H_ Definition: host_device_vector.h:87
size_t ShardSize(size_t size, size_t index) const
Definition: host_device_vector.h:149
GPUDistribution(GPUSet devices=GPUSet::Empty())
Definition: host_device_vector.h:91
const T * DevicePointer(int device) const
Definition: host_device_vector.h:219
Definition: host_device_vector.h:200
static GPUDistribution Empty()
Definition: host_device_vector.h:101
GPUSet Devices() const
Definition: host_device_vector.h:132
common::Span< const T > DeviceSpan(int device) const
Definition: host_device_vector.h:216
T * HostPointer()
Definition: host_device_vector.h:221
size_t ShardProperSize(size_t size, size_t index) const
Definition: host_device_vector.h:163
const T * HostPointer() const
Definition: host_device_vector.h:223
const T * ConstHostPointer() const
Definition: host_device_vector.h:222
const std::vector< T > & HostVector() const
Definition: host_device_vector.h:251
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition: span.h:109
Definition: host_device_vector.h:190
Definition: host_device_vector.h:80
Definition: host_device_vector.h:192
size_t Size() const
Definition: common.h:196
Definition: host_device_vector.h:190
size_t ShardStart(size_t size, int index) const
Definition: host_device_vector.h:136
friend bool operator!=(const GPUDistribution &a, const GPUDistribution &b)
Definition: host_device_vector.h:128
static GPUDistribution Block(GPUSet devices)
Definition: host_device_vector.h:103
static GPUDistribution Explicit(GPUSet devices, std::vector< size_t > offsets)
Definition: host_device_vector.h:116
namespace of xgboost
Definition: base.h:79
static GPUDistribution Granular(GPUSet devices, int granularity)
Definition: host_device_vector.h:109
GPUAccess
Definition: host_device_vector.h:189
bool IsFixedSize() const
Definition: host_device_vector.h:168
bool IsEmpty() const
Definition: host_device_vector.h:134
bool IsEmpty() const
Definition: common.h:227
static GPUDistribution Overlap(GPUSet devices, int overlap)
Definition: host_device_vector.h:105
GPUAccess operator-(GPUAccess a, GPUAccess b)
Definition: host_device_vector.h:195
static GPUSet Empty()
Definition: common.h:156
friend bool operator==(const GPUDistribution &a, const GPUDistribution &b)
Definition: host_device_vector.h:120