xgboost
linalg.h
Go to the documentation of this file.
1 
6 #ifndef XGBOOST_LINALG_H_
7 #define XGBOOST_LINALG_H_
8 
9 #include <dmlc/endian.h>
10 #include <xgboost/base.h>
13 #include <xgboost/json.h>
14 #include <xgboost/span.h>
15 
16 #include <algorithm>
17 #include <cassert>
18 #include <limits>
19 #include <string>
20 #include <tuple>
21 #include <type_traits>
22 #include <utility>
23 #include <vector>
24 
25 // decouple it from xgboost.
26 #ifndef LINALG_HD
27 #if defined(__CUDA__) || defined(__NVCC__)
28 #define LINALG_HD __host__ __device__
29 #else
30 #define LINALG_HD
31 #endif // defined (__CUDA__) || defined(__NVCC__)
32 #endif // LINALG_HD
33 
34 namespace xgboost {
35 namespace linalg {
36 namespace detail {
37 
39  template <typename T>
40  static constexpr char TypeChar() {
41  return (std::is_floating_point<T>::value
42  ? 'f'
43  : (std::is_integral<T>::value ? (std::is_signed<T>::value ? 'i' : 'u') : '\0'));
44  }
45 };
46 
47 template <size_t dim, typename S, typename Head, size_t D>
48 constexpr size_t Offset(S (&strides)[D], size_t n, Head head) {
49  static_assert(dim < D, "");
50  return n + head * strides[dim];
51 }
52 
53 template <size_t dim, typename S, size_t D, typename Head, typename... Tail>
54 constexpr std::enable_if_t<sizeof...(Tail) != 0, size_t> Offset(S (&strides)[D], size_t n,
55  Head head, Tail &&...rest) {
56  static_assert(dim < D, "");
57  return Offset<dim + 1>(strides, n + (head * strides[dim]), std::forward<Tail>(rest)...);
58 }
59 
60 template <int32_t D, bool f_array = false>
61 constexpr void CalcStride(size_t const (&shape)[D], size_t (&stride)[D]) {
62  if (f_array) {
63  stride[0] = 1;
64  for (int32_t s = 1; s < D; ++s) {
65  stride[s] = shape[s - 1] * stride[s - 1];
66  }
67  } else {
68  stride[D - 1] = 1;
69  for (int32_t s = D - 2; s >= 0; --s) {
70  stride[s] = shape[s + 1] * stride[s + 1];
71  }
72  }
73 }
74 
75 struct AllTag {};
76 
77 struct IntTag {};
78 
79 template <typename I>
80 struct RangeTag {
81  I beg;
82  I end;
83  constexpr size_t Size() const { return end - beg; }
84 };
85 
89 template <typename T>
90 constexpr int32_t CalcSliceDim() {
91  return std::is_same<T, IntTag>::value ? 0 : 1;
92 }
93 
94 template <typename T, typename... S>
95 constexpr std::enable_if_t<sizeof...(S) != 0, int32_t> CalcSliceDim() {
96  return CalcSliceDim<T>() + CalcSliceDim<S...>();
97 }
98 
99 template <int32_t D>
100 constexpr size_t CalcSize(size_t (&shape)[D]) {
101  size_t size = 1;
102  for (auto d : shape) {
103  size *= d;
104  }
105  return size;
106 }
107 
108 template <typename S>
109 using RemoveCRType = std::remove_const_t<std::remove_reference_t<S>>;
110 
111 template <typename S>
112 using IndexToTag = std::conditional_t<std::is_integral<RemoveCRType<S>>::value, IntTag, S>;
113 
114 template <int32_t n, typename Fn>
115 LINALG_HD constexpr auto UnrollLoop(Fn fn) {
116 #if defined __CUDA_ARCH__
117 #pragma unroll n
118 #endif // defined __CUDA_ARCH__
119  for (int32_t i = 0; i < n; ++i) {
120  fn(i);
121  }
122 }
123 
124 template <typename T>
125 int32_t NativePopc(T v) {
126  int c = 0;
127  for (; v != 0; v &= v - 1) c++;
128  return c;
129 }
130 
131 inline LINALG_HD int Popc(uint32_t v) {
132 #if defined(__CUDA_ARCH__)
133  return __popc(v);
134 #elif defined(__GNUC__) || defined(__clang__)
135  return __builtin_popcount(v);
136 #elif defined(_MSC_VER)
137  return __popcnt(v);
138 #else
139  return NativePopc(v);
140 #endif // compiler
141 }
142 
143 inline LINALG_HD int Popc(uint64_t v) {
144 #if defined(__CUDA_ARCH__)
145  return __popcll(v);
146 #elif defined(__GNUC__) || defined(__clang__)
147  return __builtin_popcountll(v);
148 #elif defined(_MSC_VER)
149  return __popcnt64(v);
150 #else
151  return NativePopc(v);
152 #endif // compiler
153 }
154 
155 template <class T, std::size_t N, std::size_t... Idx>
156 constexpr auto Arr2Tup(T (&arr)[N], std::index_sequence<Idx...>) {
157  return std::make_tuple(arr[Idx]...);
158 }
159 
160 template <class T, std::size_t N>
161 constexpr auto Arr2Tup(T (&arr)[N]) {
162  return Arr2Tup(arr, std::make_index_sequence<N>{});
163 }
164 
165 // uint division optimization inspired by the CIndexer in cupy. Division operation is
166 // slow on both CPU and GPU, especially 64 bit integer. So here we first try to avoid 64
167 // bit when the index is smaller, then try to avoid division when it's exp of 2.
168 template <typename I, int32_t D>
170  size_t index[D]{0};
171  static_assert(std::is_signed<decltype(D)>::value,
172  "Don't change the type without changing the for loop.");
173  for (int32_t dim = D; --dim > 0;) {
174  auto s = static_cast<std::remove_const_t<std::remove_reference_t<I>>>(shape[dim]);
175  if (s & (s - 1)) {
176  auto t = idx / s;
177  index[dim] = idx - t * s;
178  idx = t;
179  } else { // exp of 2
180  index[dim] = idx & (s - 1);
181  idx >>= Popc(s - 1);
182  }
183  }
184  index[0] = idx;
185  return Arr2Tup(index);
186 }
187 
188 template <size_t dim, typename I, int32_t D>
189 void ReshapeImpl(size_t (&out_shape)[D], I s) {
190  static_assert(dim < D, "");
191  out_shape[dim] = s;
192 }
193 
194 template <size_t dim, int32_t D, typename... S, typename I,
195  std::enable_if_t<sizeof...(S) != 0> * = nullptr>
196 void ReshapeImpl(size_t (&out_shape)[D], I &&s, S &&...rest) {
197  static_assert(dim < D, "");
198  out_shape[dim] = s;
199  ReshapeImpl<dim + 1>(out_shape, std::forward<S>(rest)...);
200 }
201 
202 template <typename Fn, typename Tup, size_t... I>
203 LINALG_HD decltype(auto) constexpr Apply(Fn &&f, Tup &&t, std::index_sequence<I...>) {
204  return f(std::get<I>(t)...);
205 }
206 
213 template <typename Fn, typename Tup>
214 LINALG_HD decltype(auto) constexpr Apply(Fn &&f, Tup &&t) {
215  constexpr auto kSize = std::tuple_size<Tup>::value;
216  return Apply(std::forward<Fn>(f), std::forward<Tup>(t), std::make_index_sequence<kSize>{});
217 }
218 
222 template <class...>
223 struct Conjunction : std::true_type {};
224 template <class B1>
225 struct Conjunction<B1> : B1 {};
226 template <class B1, class... Bn>
227 struct Conjunction<B1, Bn...> : std::conditional_t<bool(B1::value), Conjunction<Bn...>, B1> {};
228 
229 template <typename... Index>
231 
232 template <typename... Index>
233 using EnableIfIntegral = std::enable_if_t<IsAllIntegral<Index...>::value>;
234 } // namespace detail
235 
239 constexpr detail::AllTag All() { return {}; }
243 template <typename I>
244 constexpr detail::RangeTag<I> Range(I beg, I end) {
245  return {beg, end};
246 }
247 
261 template <typename T, int32_t kDim>
262 class TensorView {
263  public:
264  using ShapeT = size_t[kDim];
265  using StrideT = ShapeT;
266 
267  private:
268  StrideT stride_{1};
269  ShapeT shape_{0};
270  common::Span<T> data_;
271  T *ptr_{nullptr}; // pointer of data_ to avoid bound check.
272 
273  size_t size_{0};
274  int32_t device_{-1};
275 
276  // Unlike `Tensor`, the data_ can have arbitrary size since this is just a view.
277  LINALG_HD void CalcSize() {
278  if (data_.empty()) {
279  size_ = 0;
280  } else {
281  size_ = detail::CalcSize(shape_);
282  }
283  }
284 
285  template <size_t old_dim, size_t new_dim, int32_t D, typename I>
286  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D],
287  detail::RangeTag<I> &&range) const {
288  static_assert(new_dim < D, "");
289  static_assert(old_dim < kDim, "");
290  new_stride[new_dim] = stride_[old_dim];
291  new_shape[new_dim] = range.Size();
292  assert(static_cast<decltype(shape_[old_dim])>(range.end) <= shape_[old_dim]);
293 
294  auto offset = stride_[old_dim] * range.beg;
295  return offset;
296  }
300  template <size_t old_dim, size_t new_dim, int32_t D, typename I, typename... S>
301  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D],
302  detail::RangeTag<I> &&range, S &&...slices) const {
303  static_assert(new_dim < D, "");
304  static_assert(old_dim < kDim, "");
305  new_stride[new_dim] = stride_[old_dim];
306  new_shape[new_dim] = range.Size();
307  assert(static_cast<decltype(shape_[old_dim])>(range.end) <= shape_[old_dim]);
308 
309  auto offset = stride_[old_dim] * range.beg;
310  return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,
311  std::forward<S>(slices)...) +
312  offset;
313  }
314 
315  template <size_t old_dim, size_t new_dim, int32_t D>
316  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag) const {
317  static_assert(new_dim < D, "");
318  static_assert(old_dim < kDim, "");
319  new_stride[new_dim] = stride_[old_dim];
320  new_shape[new_dim] = shape_[old_dim];
321  return 0;
322  }
326  template <size_t old_dim, size_t new_dim, int32_t D, typename... S>
327  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag,
328  S &&...slices) const {
329  static_assert(new_dim < D, "");
330  static_assert(old_dim < kDim, "");
331  new_stride[new_dim] = stride_[old_dim];
332  new_shape[new_dim] = shape_[old_dim];
333  return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,
334  std::forward<S>(slices)...);
335  }
336 
337  template <size_t old_dim, size_t new_dim, int32_t D, typename Index>
338  LINALG_HD size_t MakeSliceDim(DMLC_ATTRIBUTE_UNUSED size_t new_shape[D],
339  DMLC_ATTRIBUTE_UNUSED size_t new_stride[D], Index i) const {
340  static_assert(old_dim < kDim, "");
341  return stride_[old_dim] * i;
342  }
346  template <size_t old_dim, size_t new_dim, int32_t D, typename Index, typename... S>
347  LINALG_HD std::enable_if_t<std::is_integral<Index>::value, size_t> MakeSliceDim(
348  size_t new_shape[D], size_t new_stride[D], Index i, S &&...slices) const {
349  static_assert(old_dim < kDim, "");
350  auto offset = stride_[old_dim] * i;
351  auto res =
352  MakeSliceDim<old_dim + 1, new_dim, D>(new_shape, new_stride, std::forward<S>(slices)...);
353  return res + offset;
354  }
355 
356  public:
357  size_t constexpr static kValueSize = sizeof(T);
358  size_t constexpr static kDimension = kDim;
359 
360  public:
372  template <typename I, int32_t D>
373  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], int32_t device)
374  : data_{data}, ptr_{data_.data()}, device_{device} {
375  static_assert(D > 0 && D <= kDim, "Invalid shape.");
376  // shape
377  detail::UnrollLoop<D>([&](auto i) { shape_[i] = shape[i]; });
378  for (auto i = D; i < kDim; ++i) {
379  shape_[i] = 1;
380  }
381  // stride
382  detail::CalcStride(shape_, stride_);
383  // size
384  this->CalcSize();
385  }
386 
391  template <typename I, int32_t D>
392  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], I const (&stride)[D],
393  int32_t device)
394  : data_{data}, ptr_{data_.data()}, device_{device} {
395  static_assert(D == kDim, "Invalid shape & stride.");
396  detail::UnrollLoop<D>([&](auto i) {
397  shape_[i] = shape[i];
398  stride_[i] = stride[i];
399  });
400  this->CalcSize();
401  }
402 
403  template <
404  typename U,
405  std::enable_if_t<common::detail::IsAllowedElementTypeConversion<U, T>::value> * = nullptr>
406  LINALG_HD TensorView(TensorView<U, kDim> const &that) // NOLINT
407  : data_{that.Values()}, ptr_{data_.data()}, size_{that.Size()}, device_{that.DeviceIdx()} {
408  detail::UnrollLoop<kDim>([&](auto i) {
409  stride_[i] = that.Stride(i);
410  shape_[i] = that.Shape(i);
411  });
412  }
413 
427  template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
428  LINALG_HD T &operator()(Index &&...index) {
429  static_assert(sizeof...(index) <= kDim, "Invalid index.");
430  size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
431  assert(offset < data_.size() && "Out of bound access.");
432  return ptr_[offset];
433  }
437  template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
438  LINALG_HD T const &operator()(Index &&...index) const {
439  static_assert(sizeof...(index) <= kDim, "Invalid index.");
440  size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
441  assert(offset < data_.size() && "Out of bound access.");
442  return ptr_[offset];
443  }
444 
458  template <typename... S>
459  LINALG_HD auto Slice(S &&...slices) const {
460  static_assert(sizeof...(slices) <= kDim, "Invalid slice.");
461  int32_t constexpr kNewDim{detail::CalcSliceDim<detail::IndexToTag<S>...>()};
462  size_t new_shape[kNewDim];
463  size_t new_stride[kNewDim];
464  auto offset = MakeSliceDim<0, 0, kNewDim>(new_shape, new_stride, std::forward<S>(slices)...);
465  // ret is a different type due to changed dimension, so we can not access its private
466  // fields.
467  TensorView<T, kNewDim> ret{data_.subspan(data_.empty() ? 0 : offset), new_shape, new_stride,
468  device_};
469  return ret;
470  }
471 
472  LINALG_HD auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }
476  LINALG_HD auto Shape(size_t i) const { return shape_[i]; }
477  LINALG_HD auto Stride() const { return common::Span<size_t const, kDim>{stride_}; }
481  LINALG_HD auto Stride(size_t i) const { return stride_[i]; }
482 
486  LINALG_HD size_t Size() const { return size_; }
490  LINALG_HD bool Contiguous() const {
491  return data_.size() == this->Size() || this->CContiguous() || this->FContiguous();
492  }
496  LINALG_HD bool CContiguous() const {
497  StrideT stride;
498  static_assert(std::is_same<decltype(stride), decltype(stride_)>::value, "");
499  // It's contiguous if the stride can be calculated from shape.
500  detail::CalcStride(shape_, stride);
502  }
506  LINALG_HD bool FContiguous() const {
507  StrideT stride;
508  static_assert(std::is_same<decltype(stride), decltype(stride_)>::value, "");
509  // It's contiguous if the stride can be calculated from shape.
510  detail::CalcStride<kDim, true>(shape_, stride);
512  }
516  LINALG_HD auto Values() const -> decltype(data_) const & { return data_; }
520  LINALG_HD auto DeviceIdx() const { return device_; }
521 };
522 
526 template <typename Container, typename I, int32_t D,
527  std::enable_if_t<!common::detail::IsSpan<Container>::value> * = nullptr>
528 auto MakeTensorView(Container &data, I const (&shape)[D], int32_t device) { // NOLINT
529  using T = typename Container::value_type;
530  return TensorView<T, D>{data, shape, device};
531 }
532 
533 template <typename T, typename I, int32_t D>
534 LINALG_HD auto MakeTensorView(common::Span<T> data, I const (&shape)[D], int32_t device) {
535  return TensorView<T, D>{data, shape, device};
536 }
537 
541 template <size_t D>
543  if (idx > std::numeric_limits<uint32_t>::max()) {
544  return detail::UnravelImpl<uint64_t, D>(static_cast<uint64_t>(idx), shape);
545  } else {
546  return detail::UnravelImpl<uint32_t, D>(static_cast<uint32_t>(idx), shape);
547  }
548 }
549 
555 template <typename T>
557 
565 template <typename T>
566 auto MakeVec(T *ptr, size_t s, int32_t device = -1) {
567  return linalg::TensorView<T, 1>{{ptr, s}, {s}, device};
568 }
569 
570 template <typename T>
572  return MakeVec(data->DeviceIdx() == -1 ? data->HostPointer() : data->DevicePointer(),
573  data->Size(), data->DeviceIdx());
574 }
575 
576 template <typename T>
577 auto MakeVec(HostDeviceVector<T> const *data) {
578  return MakeVec(data->DeviceIdx() == -1 ? data->ConstHostPointer() : data->ConstDevicePointer(),
579  data->Size(), data->DeviceIdx());
580 }
581 
587 template <typename T>
589 
596 template <typename T, int32_t D>
598  Json array_interface{Object{}};
599  array_interface["data"] = std::vector<Json>(2);
600  array_interface["data"][0] = Integer{reinterpret_cast<int64_t>(t.Values().data())};
601  array_interface["data"][1] = Boolean{true};
602  if (t.DeviceIdx() >= 0) {
603  // Change this once we have different CUDA stream.
604  array_interface["stream"] = Null{};
605  }
606  std::vector<Json> shape(t.Shape().size());
607  std::vector<Json> stride(t.Stride().size());
608  for (size_t i = 0; i < t.Shape().size(); ++i) {
609  shape[i] = Integer(t.Shape(i));
610  stride[i] = Integer(t.Stride(i) * sizeof(T));
611  }
612  array_interface["shape"] = Array{shape};
613  array_interface["strides"] = Array{stride};
614  array_interface["version"] = 3;
615 
616  char constexpr kT = detail::ArrayInterfaceHandler::TypeChar<T>();
617  static_assert(kT != '\0', "");
618  if (DMLC_LITTLE_ENDIAN) {
619  array_interface["typestr"] = String{"<" + (kT + std::to_string(sizeof(T)))};
620  } else {
621  array_interface["typestr"] = String{">" + (kT + std::to_string(sizeof(T)))};
622  }
623  return array_interface;
624 }
625 
629 template <typename T, int32_t D>
631  TensorView<T const, D> const &as_const = t;
632  auto res = ArrayInterface(as_const);
633  res["data"][1] = Boolean{false};
634  return res;
635 }
636 
640 template <typename T, int32_t D>
642  std::string str;
643  Json::Dump(ArrayInterface(t), &str);
644  return str;
645 }
646 
647 template <typename T, int32_t D>
649  std::string str;
650  Json::Dump(ArrayInterface(t), &str);
651  return str;
652 }
653 
658 template <typename T, int32_t kDim = 5>
659 class Tensor {
660  public:
661  using ShapeT = size_t[kDim];
662  using StrideT = ShapeT;
663 
664  private:
665  HostDeviceVector<T> data_;
666  ShapeT shape_{0};
667 
668  template <typename I, std::int32_t D>
669  void Initialize(I const (&shape)[D], std::int32_t device) {
670  static_assert(D <= kDim, "Invalid shape.");
671  std::copy(shape, shape + D, shape_);
672  for (auto i = D; i < kDim; ++i) {
673  shape_[i] = 1;
674  }
675  if (device >= 0) {
676  data_.SetDevice(device);
677  data_.ConstDevicePointer(); // Pull to device;
678  }
679  CHECK_EQ(data_.Size(), detail::CalcSize(shape_));
680  }
681 
682  public:
683  Tensor() = default;
684 
691  template <typename I, int32_t D>
692  explicit Tensor(I const (&shape)[D], int32_t device)
693  : Tensor{common::Span<I const, D>{shape}, device} {}
694 
695  template <typename I, size_t D>
696  explicit Tensor(common::Span<I const, D> shape, int32_t device) {
697  // No device unroll as this is a host only function.
698  std::copy(shape.data(), shape.data() + D, shape_);
699  for (auto i = D; i < kDim; ++i) {
700  shape_[i] = 1;
701  }
702  auto size = detail::CalcSize(shape_);
703  if (device >= 0) {
704  data_.SetDevice(device);
705  }
706  data_.Resize(size);
707  if (device >= 0) {
708  data_.DevicePointer(); // Pull to device
709  }
710  }
714  template <typename It, typename I, int32_t D>
715  explicit Tensor(It begin, It end, I const (&shape)[D], int32_t device) {
716  auto &h_vec = data_.HostVector();
717  h_vec.insert(h_vec.begin(), begin, end);
718  // shape
719  this->Initialize(shape, device);
720  }
721 
722  template <typename I, int32_t D>
723  explicit Tensor(std::initializer_list<T> data, I const (&shape)[D],
724  int32_t device = Context::kCpuId) {
725  auto &h_vec = data_.HostVector();
726  h_vec = data;
727  // shape
728  this->Initialize(shape, device);
729  }
734  template <typename... Index>
735  T &operator()(Index &&...idx) {
736  return this->HostView()(std::forward<Index>(idx)...);
737  }
742  template <typename... Index>
743  T const &operator()(Index &&...idx) const {
744  return this->HostView()(std::forward<Index>(idx)...);
745  }
746 
750  TensorView<T, kDim> View(int32_t device) {
751  if (device >= 0) {
752  data_.SetDevice(device);
753  auto span = data_.DeviceSpan();
754  return {span, shape_, device};
755  } else {
756  auto span = data_.HostSpan();
757  return {span, shape_, device};
758  }
759  }
760  TensorView<T const, kDim> View(int32_t device) const {
761  if (device >= 0) {
762  data_.SetDevice(device);
763  auto span = data_.ConstDeviceSpan();
764  return {span, shape_, device};
765  } else {
766  auto span = data_.ConstHostSpan();
767  return {span, shape_, device};
768  }
769  }
770 
771  auto HostView() const { return this->View(-1); }
772  auto HostView() { return this->View(-1); }
773 
774  size_t Size() const { return data_.Size(); }
775  auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }
776  auto Shape(size_t i) const { return shape_[i]; }
777 
778  HostDeviceVector<T> *Data() { return &data_; }
779  HostDeviceVector<T> const *Data() const { return &data_; }
780 
787  template <typename Fn>
788  void ModifyInplace(Fn &&fn) {
789  fn(this->Data(), common::Span<size_t, kDim>{this->shape_});
790  CHECK_EQ(this->Data()->Size(), detail::CalcSize(this->shape_))
791  << "Inconsistent size after modification.";
792  }
793 
799  template <typename... S, detail::EnableIfIntegral<S...> * = nullptr>
800  void Reshape(S &&...s) {
801  static_assert(sizeof...(S) <= kDim, "Invalid shape.");
802  detail::ReshapeImpl<0>(shape_, std::forward<S>(s)...);
803  auto constexpr kEnd = sizeof...(S);
804  static_assert(kEnd <= kDim, "Invalid shape.");
805  std::fill(shape_ + kEnd, shape_ + kDim, 1);
806  auto n = detail::CalcSize(shape_);
807  data_.Resize(n);
808  }
809 
815  template <size_t D>
817  static_assert(D <= kDim, "Invalid shape.");
818  std::copy(shape.data(), shape.data() + D, this->shape_);
819  std::fill(shape_ + D, shape_ + kDim, 1);
820  auto n = detail::CalcSize(shape_);
821  data_.Resize(n);
822  }
823 
824  template <size_t D>
825  void Reshape(size_t (&shape)[D]) {
826  this->Reshape(common::Span<size_t const, D>{shape});
827  }
828 
832  void SetDevice(int32_t device) const { data_.SetDevice(device); }
833  int32_t DeviceIdx() const { return data_.DeviceIdx(); }
834 };
835 
836 // Only first axis is supported for now.
837 template <typename T, int32_t D>
838 void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {
839  if (r.DeviceIdx() >= 0) {
840  l->SetDevice(r.DeviceIdx());
841  }
843  for (size_t i = 1; i < D; ++i) {
844  if (shape[i] == 0) {
845  shape[i] = r.Shape(i);
846  } else {
847  CHECK_EQ(shape[i], r.Shape(i));
848  }
849  }
850  data->Extend(*r.Data());
851  shape[0] = l->Shape(0) + r.Shape(0);
852  });
853 }
854 } // namespace linalg
855 } // namespace xgboost
856 
857 #if defined(LINALG_HD)
858 #undef LINALG_HD
859 #endif // defined(LINALG_HD)
860 #endif // XGBOOST_LINALG_H_
defines configuration macros of xgboost.
Definition: host_device_vector.h:86
const T * ConstDevicePointer() const
void Extend(const HostDeviceVector< T > &other)
common::Span< T const > ConstHostSpan() const
Definition: host_device_vector.h:114
std::vector< T > & HostVector()
void Resize(size_t new_size, T v=T())
common::Span< const T > ConstDeviceSpan() const
T * HostPointer()
Definition: host_device_vector.h:111
void SetDevice(int device) const
common::Span< T > DeviceSpan()
common::Span< T > HostSpan()
Definition: host_device_vector.h:112
const T * ConstHostPointer() const
Definition: host_device_vector.h:115
Definition: json.h:112
Describes both true and false.
Definition: json.h:311
Definition: json.h:252
Definition: json.h:295
Definition: json.h:189
Definition: json.h:86
Data structure representing JSON format.
Definition: json.h:356
static void Dump(Json json, std::string *out, std::ios::openmode mode=std::ios::out)
Encode the JSON object. Optional parameter mode for choosing between text and binary (ubjson) output.
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition: span.h:423
constexpr XGBOOST_DEVICE pointer data() const __span_noexcept
Definition: span.h:548
XGBOOST_DEVICE auto subspan() const -> Span< element_type, detail::ExtentValue< Extent, Offset, Count >::value >
Definition: span.h:595
constexpr XGBOOST_DEVICE index_type size() const __span_noexcept
Definition: span.h:553
constexpr XGBOOST_DEVICE bool empty() const __span_noexcept
Definition: span.h:560
A tensor view with static type and dimension. It implements indexing and slicing.
Definition: linalg.h:262
LINALG_HD auto DeviceIdx() const
Obtain the CUDA device ordinal.
Definition: linalg.h:520
size_t[kDim] ShapeT
Definition: linalg.h:264
LINALG_HD bool CContiguous() const
Whether it's a c-contiguous array.
Definition: linalg.h:496
LINALG_HD auto Stride(size_t i) const
Definition: linalg.h:481
LINALG_HD auto Shape() const
Definition: linalg.h:472
ShapeT StrideT
Definition: linalg.h:265
constexpr static size_t kDimension
Definition: linalg.h:358
LINALG_HD auto Stride() const
Definition: linalg.h:477
LINALG_HD auto Slice(S &&...slices) const
Slice the tensor. The returned tensor has inferred dim and shape. Scalar result is not supported.
Definition: linalg.h:459
LINALG_HD auto Values() const -> decltype(data_) const &
Obtain a reference to the raw data.
Definition: linalg.h:516
LINALG_HD bool Contiguous() const
Whether this is a contiguous array, both C and F contiguous returns true.
Definition: linalg.h:490
LINALG_HD T const & operator()(Index &&...index) const
Index the tensor to obtain a scalar value.
Definition: linalg.h:438
LINALG_HD TensorView(TensorView< U, kDim > const &that)
Definition: linalg.h:406
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], int32_t device)
Create a tensor with data and shape.
Definition: linalg.h:373
LINALG_HD size_t Size() const
Number of items in the tensor.
Definition: linalg.h:486
LINALG_HD T & operator()(Index &&...index)
Index the tensor to obtain a scalar value.
Definition: linalg.h:428
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], I const (&stride)[D], int32_t device)
Create a tensor with data, shape and strides. Don't use this constructor if stride can be calculated ...
Definition: linalg.h:392
constexpr static size_t kValueSize
Definition: linalg.h:357
LINALG_HD bool FContiguous() const
Whether it's a f-contiguous array.
Definition: linalg.h:506
LINALG_HD auto Shape(size_t i) const
Definition: linalg.h:476
A tensor storage. To use it for other functionality like slicing one needs to obtain a view first....
Definition: linalg.h:659
TensorView< T const, kDim > View(int32_t device) const
Definition: linalg.h:760
TensorView< T, kDim > View(int32_t device)
Get a TensorView for this tensor.
Definition: linalg.h:750
size_t[kDim] ShapeT
Definition: linalg.h:661
void SetDevice(int32_t device) const
Set device ordinal for this tensor.
Definition: linalg.h:832
HostDeviceVector< T > const * Data() const
Definition: linalg.h:779
void Reshape(size_t(&shape)[D])
Definition: linalg.h:825
auto HostView()
Definition: linalg.h:772
auto Shape(size_t i) const
Definition: linalg.h:776
HostDeviceVector< T > * Data()
Definition: linalg.h:778
Tensor(common::Span< I const, D > shape, int32_t device)
Definition: linalg.h:696
T & operator()(Index &&...idx)
Index operator. Not thread safe, should not be used in performance critical region....
Definition: linalg.h:735
Tensor(It begin, It end, I const (&shape)[D], int32_t device)
Definition: linalg.h:715
auto Shape() const
Definition: linalg.h:775
Tensor(std::initializer_list< T > data, I const (&shape)[D], int32_t device=Context::kCpuId)
Definition: linalg.h:723
Tensor(I const (&shape)[D], int32_t device)
Create a tensor with shape and device ordinal. The storage is initialized automatically.
Definition: linalg.h:692
void ModifyInplace(Fn &&fn)
Visitor function for modification that changes shape and data.
Definition: linalg.h:788
void Reshape(common::Span< size_t const, D > shape)
Reshape the tensor.
Definition: linalg.h:816
auto HostView() const
Definition: linalg.h:771
T const & operator()(Index &&...idx) const
Index operator. Not thread safe, should not be used in performance critical region....
Definition: linalg.h:743
size_t Size() const
Definition: linalg.h:774
void Reshape(S &&...s)
Reshape the tensor.
Definition: linalg.h:800
int32_t DeviceIdx() const
Definition: linalg.h:833
ShapeT StrideT
Definition: linalg.h:662
A device-and-host vector abstraction layer.
#define LINALG_HD
Definition: linalg.h:30
Definition: intrusive_ptr.h:207
LINALG_HD auto UnravelImpl(I idx, common::Span< size_t const, D > shape)
Definition: linalg.h:169
void ReshapeImpl(size_t(&out_shape)[D], I s)
Definition: linalg.h:189
LINALG_HD int Popc(uint32_t v)
Definition: linalg.h:131
std::remove_const_t< std::remove_reference_t< S > > RemoveCRType
Definition: linalg.h:109
constexpr int32_t CalcSliceDim()
Calculate the dimension of sliced tensor.
Definition: linalg.h:90
constexpr LINALG_HD auto UnrollLoop(Fn fn)
Definition: linalg.h:115
constexpr auto Arr2Tup(T(&arr)[N], std::index_sequence< Idx... >)
Definition: linalg.h:156
std::conditional_t< std::is_integral< RemoveCRType< S > >::value, IntTag, S > IndexToTag
Definition: linalg.h:112
constexpr size_t Offset(S(&strides)[D], size_t n, Head head)
Definition: linalg.h:48
decltype(auto) constexpr LINALG_HD Apply(Fn &&f, Tup &&t, std::index_sequence< I... >)
Definition: linalg.h:203
constexpr void CalcStride(size_t const (&shape)[D], size_t(&stride)[D])
Definition: linalg.h:61
int32_t NativePopc(T v)
Definition: linalg.h:125
std::enable_if_t< IsAllIntegral< Index... >::value > EnableIfIntegral
Definition: linalg.h:233
constexpr size_t CalcSize(size_t(&shape)[D])
Definition: linalg.h:100
constexpr detail::RangeTag< I > Range(I beg, I end)
Specify a range of elements in the axis for slicing.
Definition: linalg.h:244
auto MakeVec(T *ptr, size_t s, int32_t device=-1)
Create a vector view from contigious memory.
Definition: linalg.h:566
auto ArrayInterfaceStr(TensorView< T const, D > const &t)
Return string representation of array interface.
Definition: linalg.h:641
LINALG_HD auto UnravelIndex(size_t idx, common::Span< size_t const, D > shape)
Turns linear index into multi-dimension index. Similar to numpy unravel.
Definition: linalg.h:542
void Stack(Tensor< T, D > *l, Tensor< T, D > const &r)
Definition: linalg.h:838
constexpr detail::AllTag All()
Specify all elements in the axis for slicing.
Definition: linalg.h:239
Json ArrayInterface(TensorView< T const, D > const &t)
Array Interface defined by numpy.
Definition: linalg.h:597
auto MakeTensorView(Container &data, I const (&shape)[D], int32_t device)
Constructor for automatic type deduction.
Definition: linalg.h:528
namespace of xgboost
Definition: base.h:110
JsonInteger Integer
Definition: json.h:592
static constexpr int32_t kCpuId
Definition: generic_parameters.h:22
Definition: linalg.h:75
static constexpr char TypeChar()
Definition: linalg.h:40
Definition: linalg.h:223
Definition: linalg.h:77
Definition: linalg.h:80
constexpr size_t Size() const
Definition: linalg.h:83
I end
Definition: linalg.h:82
I beg
Definition: linalg.h:81