xgboost
host_device_vector.h
Go to the documentation of this file.
1 
52 #ifndef XGBOOST_COMMON_HOST_DEVICE_VECTOR_H_
53 #define XGBOOST_COMMON_HOST_DEVICE_VECTOR_H_
54 
55 #include <dmlc/logging.h>
56 
57 #include <algorithm>
58 #include <cstdlib>
59 #include <initializer_list>
60 #include <utility>
61 #include <vector>
62 
63 #include "common.h"
64 #include "span.h"
65 
66 // only include thrust-related files if host_device_vector.h
67 // is included from a .cu file
68 #ifdef __CUDACC__
69 #include <thrust/device_ptr.h>
70 #endif // __CUDACC__
71 
72 namespace xgboost {
73 
74 #ifdef __CUDACC__
75 // Sets a function to call instead of cudaSetDevice();
76 // only added for testing
77 void SetCudaSetDeviceHandler(void (*handler)(int));
78 #endif // __CUDACC__
79 
80 template <typename T> struct HostDeviceVectorImpl;
81 
82 // Distribution for the HostDeviceVector; it specifies such aspects as the
83 // devices it is distributed on, whether there are copies of elements from
84 // other GPUs as well as the granularity of splitting. It may also specify
85 // explicit boundaries for devices, in which case the size of the array cannot
86 // be changed.
88  template<typename T> friend struct HostDeviceVectorImpl;
89 
90  public:
91  explicit GPUDistribution(GPUSet devices = GPUSet::Empty())
92  : devices_(devices), granularity_(1), overlap_(0) {}
93 
94  private:
95  GPUDistribution(GPUSet devices, int granularity, int overlap,
96  std::vector<size_t> &&offsets)
97  : devices_(devices), granularity_(granularity), overlap_(overlap),
98  offsets_(std::move(offsets)) {}
99 
100  public:
101  static GPUDistribution Empty() { return GPUDistribution(); }
102 
103  static GPUDistribution Block(GPUSet devices) { return GPUDistribution(devices); }
104 
105  static GPUDistribution Overlap(GPUSet devices, int overlap) {
106  return GPUDistribution(devices, 1, overlap, std::vector<size_t>());
107  }
108 
109  static GPUDistribution Granular(GPUSet devices, int granularity) {
110  return GPUDistribution(devices, granularity, 0, std::vector<size_t>());
111  }
112 
113  // NOTE(rongou): Explicit offsets don't necessarily cover the whole vector. Sections before the
114  // first shard or after the last shard may be on host only. This windowing is done in the GPU
115  // predictor for external memory support.
116  static GPUDistribution Explicit(GPUSet devices, std::vector<size_t> offsets) {
117  return GPUDistribution(devices, 1, 0, std::move(offsets));
118  }
119 
120  friend bool operator==(const GPUDistribution& a, const GPUDistribution& b) {
121  bool const res = a.devices_ == b.devices_ &&
122  a.granularity_ == b.granularity_ &&
123  a.overlap_ == b.overlap_ &&
124  a.offsets_ == b.offsets_;
125  return res;
126  }
127 
128  friend bool operator!=(const GPUDistribution& a, const GPUDistribution& b) {
129  return !(a == b);
130  }
131 
132  GPUSet Devices() const { return devices_; }
133 
134  bool IsEmpty() const { return devices_.IsEmpty(); }
135 
136  size_t ShardStart(size_t size, int index) const {
137  if (size == 0) { return 0; }
138  if (offsets_.size() > 0) {
139  // explicit offsets are provided
140  CHECK_EQ(offsets_.back(), size);
141  return offsets_.at(index);
142  }
143  // no explicit offsets
144  size_t begin = std::min(index * Portion(size), size);
145  begin = begin > size ? size : begin;
146  return begin;
147  }
148 
149  size_t ShardSize(size_t size, size_t index) const {
150  if (size == 0) { return 0; }
151  if (offsets_.size() > 0) {
152  // explicit offsets are provided
153  CHECK_EQ(offsets_.back(), size);
154  return offsets_.at(index + 1) - offsets_.at(index) +
155  (index == devices_.Size() - 1 ? overlap_ : 0);
156  }
157  size_t portion = Portion(size);
158  size_t begin = std::min(index * portion, size);
159  size_t end = std::min((index + 1) * portion + overlap_ * granularity_, size);
160  return end - begin;
161  }
162 
163  size_t ShardProperSize(size_t size, size_t index) const {
164  if (size == 0) { return 0; }
165  return ShardSize(size, index) - (devices_.Size() - 1 > index ? overlap_ : 0);
166  }
167 
168  bool IsFixedSize() const { return !offsets_.empty(); }
169 
170  private:
171  static size_t DivRoundUp(size_t a, size_t b) { return (a + b - 1) / b; }
172  static size_t RoundUp(size_t a, size_t b) { return DivRoundUp(a, b) * b; }
173 
174  size_t Portion(size_t size) const {
175  return RoundUp
176  (DivRoundUp
177  (std::max(static_cast<int64_t>(size - overlap_ * granularity_),
178  static_cast<int64_t>(1)),
179  devices_.Size()), granularity_);
180  }
181 
182  GPUSet devices_;
183  int granularity_;
184  int overlap_;
185  // explicit offsets for the GPU parts, if any
186  std::vector<size_t> offsets_;
187 };
188 
189 enum GPUAccess {
191  // write implies read
193 };
194 
196  return static_cast<GPUAccess>(static_cast<int>(a) - static_cast<int>(b));
197 }
198 
199 template <typename T>
201  public:
202  explicit HostDeviceVector(size_t size = 0, T v = T(),
203  const GPUDistribution &distribution = GPUDistribution());
204  HostDeviceVector(std::initializer_list<T> init,
205  const GPUDistribution &distribution = GPUDistribution());
206  explicit HostDeviceVector(const std::vector<T>& init,
207  const GPUDistribution &distribution = GPUDistribution());
208  ~HostDeviceVector();
210  HostDeviceVector<T>& operator=(const HostDeviceVector<T>&);
211  size_t Size() const;
212  GPUSet Devices() const;
213  const GPUDistribution& Distribution() const;
214  common::Span<T> DeviceSpan(int device);
215  common::Span<const T> ConstDeviceSpan(int device) const;
216  common::Span<const T> DeviceSpan(int device) const { return ConstDeviceSpan(device); }
217  T* DevicePointer(int device);
218  const T* ConstDevicePointer(int device) const;
219  const T* DevicePointer(int device) const { return ConstDevicePointer(device); }
220 
221  T* HostPointer() { return HostVector().data(); }
222  const T* ConstHostPointer() const { return ConstHostVector().data(); }
223  const T* HostPointer() const { return ConstHostPointer(); }
224 
225  size_t DeviceStart(int device) const;
226  size_t DeviceSize(int device) const;
227 
228  // only define functions returning device_ptr
229  // if HostDeviceVector.h is included from a .cu file
230 #ifdef __CUDACC__
231  thrust::device_ptr<T> tbegin(int device); // NOLINT
232  thrust::device_ptr<T> tend(int device); // NOLINT
233  thrust::device_ptr<const T> tcbegin(int device) const; // NOLINT
234  thrust::device_ptr<const T> tcend(int device) const; // NOLINT
235  thrust::device_ptr<const T> tbegin(int device) const { // NOLINT
236  return tcbegin(device);
237  }
238  thrust::device_ptr<const T> tend(int device) const { return tcend(device); } // NOLINT
239 
240  void ScatterFrom(thrust::device_ptr<const T> begin, thrust::device_ptr<const T> end);
241  void GatherTo(thrust::device_ptr<T> begin, thrust::device_ptr<T> end) const;
242 #endif // __CUDACC__
243 
244  void Fill(T v);
245  void Copy(const HostDeviceVector<T>& other);
246  void Copy(const std::vector<T>& other);
247  void Copy(std::initializer_list<T> other);
248 
249  std::vector<T>& HostVector();
250  const std::vector<T>& ConstHostVector() const;
251  const std::vector<T>& HostVector() const {return ConstHostVector(); }
252 
253  bool HostCanAccess(GPUAccess access) const;
254  bool DeviceCanAccess(int device, GPUAccess access) const;
255 
259  void Shard(const GPUDistribution &distribution) const;
260  void Shard(GPUSet devices) const;
261 
265  void Reshard(const GPUDistribution &distribution);
266 
267  void Resize(size_t new_size, T v = T());
268 
269  private:
271 };
272 
273 } // namespace xgboost
274 
275 #endif // XGBOOST_COMMON_HOST_DEVICE_VECTOR_H_
Definition: host_device_vector.h:87
size_t ShardSize(size_t size, size_t index) const
Definition: host_device_vector.h:149
GPUDistribution(GPUSet devices=GPUSet::Empty())
Definition: host_device_vector.h:91
const T * DevicePointer(int device) const
Definition: host_device_vector.h:219
Definition: host_device_vector.h:200
Definition: common.h:148
static GPUDistribution Empty()
Definition: host_device_vector.h:101
GPUSet Devices() const
Definition: host_device_vector.h:132
common::Span< const T > DeviceSpan(int device) const
Definition: host_device_vector.h:216
T * HostPointer()
Definition: host_device_vector.h:221
size_t ShardProperSize(size_t size, size_t index) const
Definition: host_device_vector.h:163
const T * HostPointer() const
Definition: host_device_vector.h:223
const T * ConstHostPointer() const
Definition: host_device_vector.h:222
const std::vector< T > & HostVector() const
Definition: host_device_vector.h:251
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition: span.h:109
Definition: host_device_vector.h:190
Definition: host_device_vector.h:80
Definition: host_device_vector.h:192
size_t Size() const
Definition: common.h:196
Definition: host_device_vector.h:190
size_t ShardStart(size_t size, int index) const
Definition: host_device_vector.h:136
friend bool operator!=(const GPUDistribution &a, const GPUDistribution &b)
Definition: host_device_vector.h:128
static GPUDistribution Block(GPUSet devices)
Definition: host_device_vector.h:103
static GPUDistribution Explicit(GPUSet devices, std::vector< size_t > offsets)
Definition: host_device_vector.h:116
namespace of xgboost
Definition: base.h:79
static GPUDistribution Granular(GPUSet devices, int granularity)
Definition: host_device_vector.h:109
GPUAccess
Definition: host_device_vector.h:189
Common utilities.
bool IsFixedSize() const
Definition: host_device_vector.h:168
bool IsEmpty() const
Definition: host_device_vector.h:134
bool IsEmpty() const
Definition: common.h:227
static GPUDistribution Overlap(GPUSet devices, int overlap)
Definition: host_device_vector.h:105
GPUAccess operator-(GPUAccess a, GPUAccess b)
Definition: host_device_vector.h:195
static GPUSet Empty()
Definition: common.h:156
friend bool operator==(const GPUDistribution &a, const GPUDistribution &b)
Definition: host_device_vector.h:120