xgboost
linalg.h
Go to the documentation of this file.
1 
6 #ifndef XGBOOST_LINALG_H_
7 #define XGBOOST_LINALG_H_
8 
9 #include <dmlc/endian.h>
10 #include <xgboost/base.h>
11 #include <xgboost/context.h>
13 #include <xgboost/json.h>
14 #include <xgboost/span.h>
15 
16 #include <algorithm>
17 #include <cassert>
18 #include <cinttypes> // for int32_t
19 #include <cstddef> // for size_t
20 #include <limits>
21 #include <string>
22 #include <tuple> // for make_tuple
23 #include <type_traits>
24 #include <utility>
25 #include <vector>
26 
27 #if defined(_MSC_VER)
28 #include <intrin.h>
29 #endif // defined(_MSC_VER)
30 
31 // decouple it from xgboost.
32 #ifndef LINALG_HD
33 #if defined(__CUDA__) || defined(__NVCC__)
34 #define LINALG_HD __host__ __device__
35 #else
36 #define LINALG_HD
37 #endif // defined (__CUDA__) || defined(__NVCC__)
38 #endif // LINALG_HD
39 
40 namespace xgboost::linalg {
41 namespace detail {
42 
44  template <typename T>
45  static constexpr char TypeChar() {
46  return (std::is_floating_point<T>::value
47  ? 'f'
48  : (std::is_integral<T>::value ? (std::is_signed<T>::value ? 'i' : 'u') : '\0'));
49  }
50 };
51 
52 template <size_t dim, typename S, typename Head, size_t D>
53 constexpr size_t Offset(S (&strides)[D], size_t n, Head head) {
54  static_assert(dim < D);
55  return n + head * strides[dim];
56 }
57 
58 template <size_t dim, typename S, size_t D, typename Head, typename... Tail>
59 constexpr std::enable_if_t<sizeof...(Tail) != 0, size_t> Offset(S (&strides)[D], size_t n,
60  Head head, Tail &&...rest) {
61  static_assert(dim < D);
62  return Offset<dim + 1>(strides, n + (head * strides[dim]), std::forward<Tail>(rest)...);
63 }
64 
65 template <int32_t D, bool f_array = false>
66 constexpr void CalcStride(size_t const (&shape)[D], size_t (&stride)[D]) {
67  if (f_array) {
68  stride[0] = 1;
69  for (int32_t s = 1; s < D; ++s) {
70  stride[s] = shape[s - 1] * stride[s - 1];
71  }
72  } else {
73  stride[D - 1] = 1;
74  for (int32_t s = D - 2; s >= 0; --s) {
75  stride[s] = shape[s + 1] * stride[s + 1];
76  }
77  }
78 }
79 
80 struct AllTag {};
81 
82 struct IntTag {};
83 
84 template <typename I>
85 struct RangeTag {
86  I beg;
87  I end;
88  [[nodiscard]] constexpr size_t Size() const { return end - beg; }
89 };
90 
94 template <typename T>
95 constexpr int32_t CalcSliceDim() {
96  return std::is_same<T, IntTag>::value ? 0 : 1;
97 }
98 
99 template <typename T, typename... S>
100 constexpr std::enable_if_t<sizeof...(S) != 0, int32_t> CalcSliceDim() {
101  return CalcSliceDim<T>() + CalcSliceDim<S...>();
102 }
103 
104 template <int32_t D>
105 constexpr size_t CalcSize(size_t (&shape)[D]) {
106  size_t size = 1;
107  for (auto d : shape) {
108  size *= d;
109  }
110  return size;
111 }
112 
113 template <typename S>
114 using RemoveCRType = std::remove_const_t<std::remove_reference_t<S>>;
115 
116 template <typename S>
117 using IndexToTag = std::conditional_t<std::is_integral<RemoveCRType<S>>::value, IntTag, S>;
118 
119 template <int32_t n, typename Fn>
120 LINALG_HD constexpr auto UnrollLoop(Fn fn) {
121 #if defined __CUDA_ARCH__
122 #pragma unroll n
123 #endif // defined __CUDA_ARCH__
124  for (int32_t i = 0; i < n; ++i) {
125  fn(i);
126  }
127 }
128 
129 template <typename T>
130 int32_t NativePopc(T v) {
131  int c = 0;
132  for (; v != 0; v &= v - 1) c++;
133  return c;
134 }
135 
136 inline LINALG_HD int Popc(uint32_t v) {
137 #if defined(__CUDA_ARCH__)
138  return __popc(v);
139 #elif defined(__GNUC__) || defined(__clang__)
140  return __builtin_popcount(v);
141 #elif defined(_MSC_VER)
142  return __popcnt(v);
143 #else
144  return NativePopc(v);
145 #endif // compiler
146 }
147 
148 inline LINALG_HD int Popc(uint64_t v) {
149 #if defined(__CUDA_ARCH__)
150  return __popcll(v);
151 #elif defined(__GNUC__) || defined(__clang__)
152  return __builtin_popcountll(v);
153 #elif defined(_MSC_VER) && defined(_M_X64)
154  return __popcnt64(v);
155 #else
156  return NativePopc(v);
157 #endif // compiler
158 }
159 
160 template <std::size_t D, typename Head>
161 LINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head) {
162  static_assert(std::is_integral<std::remove_reference_t<Head>>::value, "Invalid index type.");
163  arr[D - 1] = head;
164 }
165 
169 template <std::size_t D, typename Head, typename... Rest>
170 LINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head, Rest &&...index) {
171  static_assert(sizeof...(Rest) < D, "Index overflow.");
172  static_assert(std::is_integral<std::remove_reference_t<Head>>::value, "Invalid index type.");
173  arr[D - sizeof...(Rest) - 1] = head;
174  IndexToArr(arr, std::forward<Rest>(index)...);
175 }
176 
177 template <class T, std::size_t N, std::size_t... Idx>
178 constexpr auto ArrToTuple(T (&arr)[N], std::index_sequence<Idx...>) {
179  return std::make_tuple(arr[Idx]...);
180 }
181 
185 template <class T, std::size_t N>
186 constexpr auto ArrToTuple(T (&arr)[N]) {
187  return ArrToTuple(arr, std::make_index_sequence<N>{});
188 }
189 
190 // uint division optimization inspired by the CIndexer in cupy. Division operation is
191 // slow on both CPU and GPU, especially 64 bit integer. So here we first try to avoid 64
192 // bit when the index is smaller, then try to avoid division when it's exp of 2.
193 template <typename I, std::int32_t D>
195  std::size_t index[D]{0};
196  static_assert(std::is_signed<decltype(D)>::value,
197  "Don't change the type without changing the for loop.");
198  auto const sptr = shape.data();
199  for (int32_t dim = D; --dim > 0;) {
200  auto s = static_cast<std::remove_const_t<std::remove_reference_t<I>>>(sptr[dim]);
201  if (s & (s - 1)) {
202  auto t = idx / s;
203  index[dim] = idx - t * s;
204  idx = t;
205  } else { // exp of 2
206  index[dim] = idx & (s - 1);
207  idx >>= Popc(s - 1);
208  }
209  }
210  index[0] = idx;
211  return ArrToTuple(index);
212 }
213 
214 template <size_t dim, typename I, int32_t D>
215 void ReshapeImpl(size_t (&out_shape)[D], I s) {
216  static_assert(dim < D);
217  out_shape[dim] = s;
218 }
219 
220 template <size_t dim, int32_t D, typename... S, typename I,
221  std::enable_if_t<sizeof...(S) != 0> * = nullptr>
222 void ReshapeImpl(size_t (&out_shape)[D], I &&s, S &&...rest) {
223  static_assert(dim < D);
224  out_shape[dim] = s;
225  ReshapeImpl<dim + 1>(out_shape, std::forward<S>(rest)...);
226 }
227 
228 template <typename Fn, typename Tup, size_t... I>
229 LINALG_HD decltype(auto) constexpr Apply(Fn &&f, Tup &&t, std::index_sequence<I...>) {
230  return f(std::get<I>(t)...);
231 }
232 
239 template <typename Fn, typename Tup>
240 LINALG_HD decltype(auto) constexpr Apply(Fn &&f, Tup &&t) {
241  constexpr auto kSize = std::tuple_size<Tup>::value;
242  return Apply(std::forward<Fn>(f), std::forward<Tup>(t), std::make_index_sequence<kSize>{});
243 }
244 
248 template <class...>
249 struct Conjunction : std::true_type {};
250 template <class B1>
251 struct Conjunction<B1> : B1 {};
252 template <class B1, class... Bn>
253 struct Conjunction<B1, Bn...>
254  : std::conditional_t<static_cast<bool>(B1::value), Conjunction<Bn...>, B1> {};
255 
256 template <typename... Index>
258 
259 template <typename... Index>
260 using EnableIfIntegral = std::enable_if_t<IsAllIntegral<Index...>::value>;
261 } // namespace detail
262 
266 constexpr detail::AllTag All() { return {}; }
270 template <typename I>
271 constexpr detail::RangeTag<I> Range(I beg, I end) {
272  return {beg, end};
273 }
274 
275 enum Order : std::uint8_t {
276  kC, // Row major
277  kF, // Col major
278 };
279 
293 template <typename T, int32_t kDim>
294 class TensorView {
295  public:
296  using ShapeT = std::size_t[kDim];
297  using StrideT = ShapeT;
298 
299  using element_type = T; // NOLINT
300  using value_type = std::remove_cv_t<T>; // NOLINT
301 
302  private:
303  StrideT stride_{1};
304  ShapeT shape_{0};
305  common::Span<T> data_;
306  T *ptr_{nullptr}; // pointer of data_ to avoid bound check.
307 
308  size_t size_{0};
309  DeviceOrd device_;
310 
311  // Unlike `Tensor`, the data_ can have arbitrary size since this is just a view.
312  LINALG_HD void CalcSize() {
313  if (data_.empty()) {
314  size_ = 0;
315  } else {
316  size_ = detail::CalcSize(shape_);
317  }
318  }
319 
320  template <size_t old_dim, size_t new_dim, int32_t D, typename I>
321  LINALG_HD size_t MakeSliceDim(std::size_t new_shape[D], std::size_t new_stride[D],
322  detail::RangeTag<I> &&range) const {
323  static_assert(new_dim < D);
324  static_assert(old_dim < kDim);
325  new_stride[new_dim] = stride_[old_dim];
326  new_shape[new_dim] = range.Size();
327  assert(static_cast<decltype(shape_[old_dim])>(range.end) <= shape_[old_dim]);
328 
329  auto offset = stride_[old_dim] * range.beg;
330  return offset;
331  }
335  template <size_t old_dim, size_t new_dim, int32_t D, typename I, typename... S>
336  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D],
337  detail::RangeTag<I> &&range, S &&...slices) const {
338  static_assert(new_dim < D);
339  static_assert(old_dim < kDim);
340  new_stride[new_dim] = stride_[old_dim];
341  new_shape[new_dim] = range.Size();
342  assert(static_cast<decltype(shape_[old_dim])>(range.end) <= shape_[old_dim]);
343 
344  auto offset = stride_[old_dim] * range.beg;
345  return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,
346  std::forward<S>(slices)...) +
347  offset;
348  }
349 
350  template <size_t old_dim, size_t new_dim, int32_t D>
351  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag) const {
352  static_assert(new_dim < D);
353  static_assert(old_dim < kDim);
354  new_stride[new_dim] = stride_[old_dim];
355  new_shape[new_dim] = shape_[old_dim];
356  return 0;
357  }
361  template <size_t old_dim, size_t new_dim, int32_t D, typename... S>
362  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag,
363  S &&...slices) const {
364  static_assert(new_dim < D);
365  static_assert(old_dim < kDim);
366  new_stride[new_dim] = stride_[old_dim];
367  new_shape[new_dim] = shape_[old_dim];
368  return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,
369  std::forward<S>(slices)...);
370  }
371 
372  template <size_t old_dim, size_t new_dim, int32_t D, typename Index>
373  LINALG_HD size_t MakeSliceDim(DMLC_ATTRIBUTE_UNUSED size_t new_shape[D],
374  DMLC_ATTRIBUTE_UNUSED size_t new_stride[D], Index i) const {
375  static_assert(old_dim < kDim);
376  return stride_[old_dim] * i;
377  }
381  template <size_t old_dim, size_t new_dim, int32_t D, typename Index, typename... S>
382  LINALG_HD std::enable_if_t<std::is_integral<Index>::value, size_t> MakeSliceDim(
383  size_t new_shape[D], size_t new_stride[D], Index i, S &&...slices) const {
384  static_assert(old_dim < kDim);
385  auto offset = stride_[old_dim] * i;
386  auto res =
387  MakeSliceDim<old_dim + 1, new_dim, D>(new_shape, new_stride, std::forward<S>(slices)...);
388  return res + offset;
389  }
390 
391  public:
392  size_t constexpr static kValueSize = sizeof(T);
393  size_t constexpr static kDimension = kDim;
394 
395  public:
407  template <typename I, std::int32_t D>
408  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], DeviceOrd device)
409  : TensorView{data, shape, device, Order::kC} {}
410 
411  template <typename I, int32_t D>
412  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], DeviceOrd device, Order order)
413  : data_{data}, ptr_{data_.data()}, device_{device} {
414  static_assert(D > 0 && D <= kDim, "Invalid shape.");
415  // shape
416  detail::UnrollLoop<D>([&](auto i) { shape_[i] = shape[i]; });
417  for (auto i = D; i < kDim; ++i) {
418  shape_[i] = 1;
419  }
420  // stride
421  switch (order) {
422  case Order::kC: {
423  detail::CalcStride(shape_, stride_);
424  break;
425  }
426  case Order::kF: {
427  detail::CalcStride<kDim, true>(shape_, stride_);
428  break;
429  }
430  default: {
431  SPAN_CHECK(false);
432  }
433  }
434  // size
435  this->CalcSize();
436  }
437 
442  template <typename I, std::int32_t D>
443  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], I const (&stride)[D],
444  DeviceOrd device)
445  : data_{data}, ptr_{data_.data()}, device_{device} {
446  static_assert(D == kDim, "Invalid shape & stride.");
447  detail::UnrollLoop<D>([&](auto i) {
448  shape_[i] = shape[i];
449  stride_[i] = stride[i];
450  });
451  this->CalcSize();
452  }
453 
454  template <
455  typename U,
456  std::enable_if_t<common::detail::IsAllowedElementTypeConversion<U, T>::value> * = nullptr>
457  LINALG_HD TensorView(TensorView<U, kDim> const &that) // NOLINT
458  : data_{that.Values()}, ptr_{data_.data()}, size_{that.Size()}, device_{that.Device()} {
459  detail::UnrollLoop<kDim>([&](auto i) {
460  stride_[i] = that.Stride(i);
461  shape_[i] = that.Shape(i);
462  });
463  }
464 
478  template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
479  LINALG_HD T &operator()(Index &&...index) {
480  static_assert(sizeof...(index) <= kDim, "Invalid index.");
481  size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
482  assert(offset < data_.size() && "Out of bound access.");
483  return ptr_[offset];
484  }
488  template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
489  LINALG_HD T const &operator()(Index &&...index) const {
490  static_assert(sizeof...(index) <= kDim, "Invalid index.");
491  size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
492  assert(offset < data_.size() && "Out of bound access.");
493  return ptr_[offset];
494  }
495 
509  template <typename... S>
510  LINALG_HD auto Slice(S &&...slices) const {
511  static_assert(sizeof...(slices) <= kDim, "Invalid slice.");
512  int32_t constexpr kNewDim{detail::CalcSliceDim<detail::IndexToTag<S>...>()};
513  size_t new_shape[kNewDim];
514  size_t new_stride[kNewDim];
515  auto offset = MakeSliceDim<0, 0, kNewDim>(new_shape, new_stride, std::forward<S>(slices)...);
516  // ret is a different type due to changed dimension, so we can not access its private
517  // fields.
518  TensorView<T, kNewDim> ret{data_.subspan(data_.empty() ? 0 : offset), new_shape, new_stride,
519  device_};
520  return ret;
521  }
522 
523  LINALG_HD auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }
527  LINALG_HD auto Shape(size_t i) const { return shape_[i]; }
528  LINALG_HD auto Stride() const { return common::Span<size_t const, kDim>{stride_}; }
532  LINALG_HD auto Stride(size_t i) const { return stride_[i]; }
533 
537  [[nodiscard]] LINALG_HD std::size_t Size() const { return size_; }
538  [[nodiscard]] bool Empty() const { return Size() == 0; }
542  [[nodiscard]] LINALG_HD bool Contiguous() const {
543  return data_.size() == this->Size() || this->CContiguous() || this->FContiguous();
544  }
548  [[nodiscard]] LINALG_HD bool CContiguous() const {
549  StrideT stride;
550  static_assert(std::is_same<decltype(stride), decltype(stride_)>::value);
551  // It's contiguous if the stride can be calculated from shape.
552  detail::CalcStride(shape_, stride);
554  }
558  [[nodiscard]] LINALG_HD bool FContiguous() const {
559  StrideT stride;
560  static_assert(std::is_same<decltype(stride), decltype(stride_)>::value);
561  // It's contiguous if the stride can be calculated from shape.
562  detail::CalcStride<kDim, true>(shape_, stride);
564  }
568  LINALG_HD auto Values() const -> decltype(data_) const & { return data_; }
572  LINALG_HD auto Device() const { return device_; }
573 };
574 
578 template <typename Container, typename... S,
579  std::enable_if_t<!common::detail::IsSpan<Container>::value &&
580  !std::is_pointer_v<Container>> * = nullptr>
581 auto MakeTensorView(Context const *ctx, Container &data, S &&...shape) { // NOLINT
582  using T = std::conditional_t<std::is_const_v<Container>,
583  std::add_const_t<typename Container::value_type>,
584  typename Container::value_type>;
585  std::size_t in_shape[sizeof...(S)];
586  detail::IndexToArr(in_shape, std::forward<S>(shape)...);
587  return TensorView<T, sizeof...(S)>{data, in_shape, ctx->Device()};
588 }
589 
590 template <typename T, decltype(common::dynamic_extent) ext, typename... S>
591 LINALG_HD auto MakeTensorView(DeviceOrd device, common::Span<T, ext> data, S &&...shape) {
592  std::size_t in_shape[sizeof...(S)];
593  detail::IndexToArr(in_shape, std::forward<S>(shape)...);
594  return TensorView<T, sizeof...(S)>{data, in_shape, device};
595 }
596 
597 template <typename T, decltype(common::dynamic_extent) ext, typename... S>
598 auto MakeTensorView(Context const *ctx, common::Span<T, ext> data, S &&...shape) {
599  return MakeTensorView(ctx->Device(), data, std::forward<S>(shape)...);
600 }
601 
602 template <typename T, decltype(common::dynamic_extent) ext, typename... S>
603 auto MakeTensorView(Context const *ctx, Order order, common::Span<T, ext> data, S &&...shape) {
604  std::size_t in_shape[sizeof...(S)];
605  detail::IndexToArr(in_shape, std::forward<S>(shape)...);
606  return TensorView<T, sizeof...(S)>{data, in_shape, ctx->Device(), order};
607 }
608 
609 template <typename T, typename... S>
610 auto MakeTensorView(Context const *ctx, HostDeviceVector<T> *data, S &&...shape) {
611  auto span = ctx->IsCUDA() ? data->DeviceSpan() : data->HostSpan();
612  return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);
613 }
614 
615 template <typename T, typename... S>
616 auto MakeTensorView(Context const *ctx, HostDeviceVector<T> const *data, S &&...shape) {
617  auto span = ctx->IsCUDA() ? data->ConstDeviceSpan() : data->ConstHostSpan();
618  return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);
619 }
620 
624 template <size_t D>
626  if (idx > std::numeric_limits<uint32_t>::max()) {
627  return detail::UnravelImpl<uint64_t, D>(static_cast<uint64_t>(idx), shape);
628  } else {
629  return detail::UnravelImpl<uint32_t, D>(static_cast<uint32_t>(idx), shape);
630  }
631 }
632 
633 template <size_t D>
634 LINALG_HD auto UnravelIndex(size_t idx, std::size_t const (&shape)[D]) {
636 }
637 
638 template <typename... S>
639 LINALG_HD auto UnravelIndex(std::size_t idx, S... shape) {
640  std::size_t s[sizeof...(S)];
641  detail::IndexToArr(s, shape...);
642  return UnravelIndex(idx, common::Span<std::size_t const, sizeof...(S)>(s));
643 }
644 
650 template <typename T>
652 
660 template <typename T>
661 auto MakeVec(T *ptr, size_t s, DeviceOrd device = DeviceOrd::CPU()) {
662  return linalg::TensorView<T, 1>{{ptr, s}, {s}, device};
663 }
664 
665 template <typename T>
667  return MakeVec(data->Device().IsCPU() ? data->HostPointer() : data->DevicePointer(), data->Size(),
668  data->Device());
669 }
670 
671 template <typename T>
672 auto MakeVec(HostDeviceVector<T> const *data) {
673  return MakeVec(data->Device().IsCPU() ? data->ConstHostPointer() : data->ConstDevicePointer(),
674  data->Size(), data->Device());
675 }
676 
682 template <typename T>
684 
691 template <typename T, std::int32_t D>
693  Json array_interface{Object{}};
694  array_interface["data"] = std::vector<Json>(2);
695  array_interface["data"][0] = Integer{reinterpret_cast<int64_t>(t.Values().data())};
696  array_interface["data"][1] = Boolean{true};
697  if (t.Device().IsCUDA()) {
698  // Change this once we have different CUDA stream.
699  array_interface["stream"] = Integer{2};
700  }
701  std::vector<Json> shape(t.Shape().size());
702  std::vector<Json> stride(t.Stride().size());
703  for (size_t i = 0; i < t.Shape().size(); ++i) {
704  shape[i] = Integer(t.Shape(i));
705  stride[i] = Integer(t.Stride(i) * sizeof(T));
706  }
707  array_interface["shape"] = Array{shape};
708  array_interface["strides"] = Array{stride};
709  array_interface["version"] = 3;
710 
711  char constexpr kT = detail::ArrayInterfaceHandler::TypeChar<T>();
712  static_assert(kT != '\0');
713  if (DMLC_LITTLE_ENDIAN) {
714  array_interface["typestr"] = String{"<" + (kT + std::to_string(sizeof(T)))};
715  } else {
716  array_interface["typestr"] = String{">" + (kT + std::to_string(sizeof(T)))};
717  }
718  return array_interface;
719 }
720 
724 template <typename T, int32_t D>
726  TensorView<T const, D> const &as_const = t;
727  auto res = ArrayInterface(as_const);
728  res["data"][1] = Boolean{false};
729  return res;
730 }
731 
735 template <typename T, int32_t D>
737  std::string str;
738  Json::Dump(ArrayInterface(t), &str);
739  return str;
740 }
741 
742 template <typename T, int32_t D>
744  std::string str;
745  Json::Dump(ArrayInterface(t), &str);
746  return str;
747 }
748 
749 template <typename T>
750 auto Make1dInterface(T const *vec, std::size_t len) {
751  Context ctx;
752  auto t = linalg::MakeTensorView(&ctx, common::Span{vec, len}, len);
753  auto str = linalg::ArrayInterfaceStr(t);
754  return str;
755 }
756 
761 template <typename T, int32_t kDim = 5>
762 class Tensor {
763  public:
764  using ShapeT = std::size_t[kDim];
765  using StrideT = ShapeT;
766 
767  private:
768  HostDeviceVector<T> data_;
769  ShapeT shape_{0};
770  Order order_{Order::kC};
771 
772  template <typename I, std::int32_t D>
773  void Initialize(I const (&shape)[D], DeviceOrd device) {
774  static_assert(D <= kDim, "Invalid shape.");
775  std::copy(shape, shape + D, shape_);
776  for (auto i = D; i < kDim; ++i) {
777  shape_[i] = 1;
778  }
779  if (device.IsCUDA()) {
780  data_.SetDevice(device);
781  data_.ConstDevicePointer(); // Pull to device;
782  }
783  CHECK_EQ(data_.Size(), detail::CalcSize(shape_));
784  }
785 
786  public:
787  Tensor() = default;
788 
795  template <typename I, int32_t D>
796  explicit Tensor(I const (&shape)[D], DeviceOrd device, Order order = kC)
797  : Tensor{common::Span<I const, D>{shape}, device, order} {}
798 
799  template <typename I, size_t D>
800  explicit Tensor(common::Span<I const, D> shape, DeviceOrd device, Order order = kC)
801  : order_{order} {
802  // No device unroll as this is a host only function.
803  std::copy(shape.data(), shape.data() + D, shape_);
804  for (auto i = D; i < kDim; ++i) {
805  shape_[i] = 1;
806  }
807  auto size = detail::CalcSize(shape_);
808  if (device.IsCUDA()) {
809  data_.SetDevice(device);
810  }
811  data_.Resize(size);
812  if (device.IsCUDA()) {
813  data_.DevicePointer(); // Pull to device
814  }
815  }
819  template <typename It, typename I, int32_t D>
820  explicit Tensor(It begin, It end, I const (&shape)[D], DeviceOrd device, Order order = kC)
821  : order_{order} {
822  auto &h_vec = data_.HostVector();
823  h_vec.insert(h_vec.begin(), begin, end);
824  // shape
825  this->Initialize(shape, device);
826  }
827 
828  template <typename I, int32_t D>
829  explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], DeviceOrd device,
830  Order order = kC)
831  : order_{order} {
832  auto &h_vec = data_.HostVector();
833  h_vec = data;
834  // shape
835  this->Initialize(shape, device);
836  }
841  template <typename... Index>
842  T &operator()(Index &&...idx) {
843  return this->HostView()(std::forward<Index>(idx)...);
844  }
849  template <typename... Index>
850  T const &operator()(Index &&...idx) const {
851  return this->HostView()(std::forward<Index>(idx)...);
852  }
853 
857  auto View(DeviceOrd device) {
858  if (device.IsCUDA()) {
859  data_.SetDevice(device);
860  auto span = data_.DeviceSpan();
861  return TensorView<T, kDim>{span, shape_, device, order_};
862  } else {
863  auto span = data_.HostSpan();
864  return TensorView<T, kDim>{span, shape_, device, order_};
865  }
866  }
867  auto View(DeviceOrd device) const {
868  if (device.IsCUDA()) {
869  data_.SetDevice(device);
870  auto span = data_.ConstDeviceSpan();
871  return TensorView<T const, kDim>{span, shape_, device, order_};
872  } else {
873  auto span = data_.ConstHostSpan();
874  return TensorView<T const, kDim>{span, shape_, device, order_};
875  }
876  }
877 
878  auto HostView() { return this->View(DeviceOrd::CPU()); }
879  auto HostView() const { return this->View(DeviceOrd::CPU()); }
880 
881  [[nodiscard]] std::size_t Size() const { return data_.Size(); }
882  [[nodiscard]] bool Empty() const { return Size() == 0; }
883 
884  auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }
885  auto Shape(size_t i) const { return shape_[i]; }
886 
887  HostDeviceVector<T> *Data() { return &data_; }
888  HostDeviceVector<T> const *Data() const { return &data_; }
889 
896  template <typename Fn>
897  void ModifyInplace(Fn &&fn) {
898  fn(this->Data(), common::Span<size_t, kDim>{this->shape_});
899  CHECK_EQ(this->Data()->Size(), detail::CalcSize(this->shape_))
900  << "Inconsistent size after modification.";
901  }
902 
908  template <typename... S, detail::EnableIfIntegral<S...> * = nullptr>
909  void Reshape(S &&...s) {
910  static_assert(sizeof...(S) <= kDim, "Invalid shape.");
911  detail::ReshapeImpl<0>(shape_, std::forward<S>(s)...);
912  auto constexpr kEnd = sizeof...(S);
913  static_assert(kEnd <= kDim, "Invalid shape.");
914  std::fill(shape_ + kEnd, shape_ + kDim, 1);
915  auto n = detail::CalcSize(shape_);
916  data_.Resize(n);
917  }
918 
924  template <size_t D>
926  static_assert(D <= kDim, "Invalid shape.");
927  std::copy(shape.data(), shape.data() + D, this->shape_);
928  std::fill(shape_ + D, shape_ + kDim, 1);
929  auto n = detail::CalcSize(shape_);
930  data_.Resize(n);
931  }
932 
933  template <size_t D>
934  void Reshape(size_t (&shape)[D]) {
935  this->Reshape(common::Span<size_t const, D>{shape});
936  }
940  template <typename... S>
941  auto Slice(S &&...slices) const {
942  return this->HostView().Slice(std::forward<S>(slices)...);
943  }
947  template <typename... S>
948  auto Slice(S &&...slices) {
949  return this->HostView().Slice(std::forward<S>(slices)...);
950  }
951 
955  void SetDevice(DeviceOrd device) const { data_.SetDevice(device); }
956  [[nodiscard]] DeviceOrd Device() const { return data_.Device(); }
957 };
958 
959 template <typename T>
961 
962 template <typename T>
964 
968 template <typename T, typename... Index>
969 auto Empty(Context const *ctx, Index &&...index) {
970  Tensor<T, sizeof...(Index)> t;
971  t.SetDevice(ctx->Device());
972  t.Reshape(index...);
973  return t;
974 }
975 
979 template <typename T, typename... Index>
980 auto Constant(Context const *ctx, T v, Index &&...index) {
981  Tensor<T, sizeof...(Index)> t;
982  t.SetDevice(ctx->Device());
983  t.Reshape(index...);
984  t.Data()->Fill(std::move(v));
985  return t;
986 }
987 
991 template <typename T, typename... Index>
992 auto Zeros(Context const *ctx, Index &&...index) {
993  return Constant(ctx, static_cast<T>(0), index...);
994 }
995 
996 // Only first axis is supported for now.
997 template <typename T, int32_t D>
998 void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {
999  if (r.Device().IsCUDA()) {
1000  l->SetDevice(r.Device());
1001  }
1003  for (size_t i = 1; i < D; ++i) {
1004  if (shape[i] == 0) {
1005  shape[i] = r.Shape(i);
1006  } else {
1007  CHECK_EQ(shape[i], r.Shape(i));
1008  }
1009  }
1010  data->Extend(*r.Data());
1011  shape[0] = l->Shape(0) + r.Shape(0);
1012  });
1013 }
1014 } // namespace xgboost::linalg
1015 
1016 #if defined(LINALG_HD)
1017 #undef LINALG_HD
1018 #endif // defined(LINALG_HD)
1019 #endif // XGBOOST_LINALG_H_
Defines configuration macros and basic types for xgboost.
Definition: host_device_vector.h:87
std::size_t Size() const
const T * ConstDevicePointer() const
void Extend(const HostDeviceVector< T > &other)
common::Span< T const > ConstHostSpan() const
Definition: host_device_vector.h:115
std::vector< T > & HostVector()
void Resize(size_t new_size, T v=T())
common::Span< const T > ConstDeviceSpan() const
T * HostPointer()
Definition: host_device_vector.h:112
common::Span< T > DeviceSpan()
common::Span< T > HostSpan()
Definition: host_device_vector.h:113
void SetDevice(DeviceOrd device) const
DeviceOrd Device() const
const T * ConstHostPointer() const
Definition: host_device_vector.h:116
Definition: json.h:113
Describes both true and false.
Definition: json.h:322
Definition: json.h:261
Definition: json.h:196
Definition: json.h:86
Data structure representing JSON format.
Definition: json.h:368
static void Dump(Json json, std::string *out, std::ios::openmode mode=std::ios::out)
Encode the JSON object. Optional parameter mode for choosing between text and binary (ubjson) output.
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition: span.h:422
constexpr XGBOOST_DEVICE pointer data() const __span_noexcept
Definition: span.h:547
XGBOOST_DEVICE auto subspan() const -> Span< element_type, detail::ExtentValue< Extent, Offset, Count >::value >
Definition: span.h:594
constexpr XGBOOST_DEVICE index_type size() const __span_noexcept
Definition: span.h:552
constexpr XGBOOST_DEVICE bool empty() const __span_noexcept
Definition: span.h:559
A tensor view with static type and dimension. It implements indexing and slicing.
Definition: linalg.h:294
LINALG_HD std::size_t Size() const
Number of items in the tensor.
Definition: linalg.h:537
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], DeviceOrd device)
Create a tensor with data and shape.
Definition: linalg.h:408
std::remove_cv_t< T > value_type
Definition: linalg.h:300
T element_type
Definition: linalg.h:299
LINALG_HD bool CContiguous() const
Whether it's a c-contiguous array.
Definition: linalg.h:548
LINALG_HD auto Stride(size_t i) const
Definition: linalg.h:532
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], DeviceOrd device, Order order)
Definition: linalg.h:412
LINALG_HD auto Shape() const
Definition: linalg.h:523
ShapeT StrideT
Definition: linalg.h:297
constexpr static size_t kDimension
Definition: linalg.h:393
LINALG_HD auto Stride() const
Definition: linalg.h:528
LINALG_HD auto Slice(S &&...slices) const
Slice the tensor. The returned tensor has inferred dim and shape. Scalar result is not supported.
Definition: linalg.h:510
LINALG_HD auto Values() const -> decltype(data_) const &
Obtain a reference to the raw data.
Definition: linalg.h:568
LINALG_HD bool Contiguous() const
Whether this is a contiguous array, both C and F contiguous returns true.
Definition: linalg.h:542
bool Empty() const
Definition: linalg.h:538
LINALG_HD T const & operator()(Index &&...index) const
Index the tensor to obtain a scalar value.
Definition: linalg.h:489
LINALG_HD TensorView(TensorView< U, kDim > const &that)
Definition: linalg.h:457
LINALG_HD T & operator()(Index &&...index)
Index the tensor to obtain a scalar value.
Definition: linalg.h:479
constexpr static size_t kValueSize
Definition: linalg.h:392
LINALG_HD bool FContiguous() const
Whether it's a f-contiguous array.
Definition: linalg.h:558
LINALG_HD auto Shape(size_t i) const
Definition: linalg.h:527
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], I const (&stride)[D], DeviceOrd device)
Create a tensor with data, shape and strides. Don't use this constructor if stride can be calculated ...
Definition: linalg.h:443
std::size_t[kDim] ShapeT
Definition: linalg.h:296
LINALG_HD auto Device() const
Obtain the CUDA device ordinal.
Definition: linalg.h:572
A tensor storage. To use it for other functionality like slicing one needs to obtain a view first....
Definition: linalg.h:762
auto Slice(S &&...slices)
Get a host view on the slice.
Definition: linalg.h:948
bool Empty() const
Definition: linalg.h:882
Tensor(It begin, It end, I const (&shape)[D], DeviceOrd device, Order order=kC)
Definition: linalg.h:820
auto Slice(S &&...slices) const
Get a host view on the slice.
Definition: linalg.h:941
HostDeviceVector< T > const * Data() const
Definition: linalg.h:888
void Reshape(size_t(&shape)[D])
Definition: linalg.h:934
auto View(DeviceOrd device) const
Definition: linalg.h:867
auto HostView()
Definition: linalg.h:878
auto Shape(size_t i) const
Definition: linalg.h:885
HostDeviceVector< T > * Data()
Definition: linalg.h:887
T & operator()(Index &&...idx)
Index operator. Not thread safe, should not be used in performance critical region....
Definition: linalg.h:842
auto View(DeviceOrd device)
Get a TensorView for this tensor.
Definition: linalg.h:857
Tensor(common::Span< I const, D > shape, DeviceOrd device, Order order=kC)
Definition: linalg.h:800
auto Shape() const
Definition: linalg.h:884
void ModifyInplace(Fn &&fn)
Visitor function for modification that changes shape and data.
Definition: linalg.h:897
void SetDevice(DeviceOrd device) const
Set device ordinal for this tensor.
Definition: linalg.h:955
Tensor(std::initializer_list< T > data, I const (&shape)[D], DeviceOrd device, Order order=kC)
Definition: linalg.h:829
void Reshape(common::Span< size_t const, D > shape)
Reshape the tensor.
Definition: linalg.h:925
DeviceOrd Device() const
Definition: linalg.h:956
auto HostView() const
Definition: linalg.h:879
Tensor(I const (&shape)[D], DeviceOrd device, Order order=kC)
Create a tensor with shape and device ordinal. The storage is initialized automatically.
Definition: linalg.h:796
T const & operator()(Index &&...idx) const
Index operator. Not thread safe, should not be used in performance critical region....
Definition: linalg.h:850
std::size_t[kDim] ShapeT
Definition: linalg.h:764
void Reshape(S &&...s)
Reshape the tensor.
Definition: linalg.h:909
ShapeT StrideT
Definition: linalg.h:765
std::size_t Size() const
Definition: linalg.h:881
A device-and-host vector abstraction layer.
#define LINALG_HD
Definition: linalg.h:36
Definition: intrusive_ptr.h:207
constexpr std::size_t dynamic_extent
Definition: span.h:141
LINALG_HD auto UnravelImpl(I idx, common::Span< size_t const, D > shape)
Definition: linalg.h:194
void ReshapeImpl(size_t(&out_shape)[D], I s)
Definition: linalg.h:215
LINALG_HD int Popc(uint32_t v)
Definition: linalg.h:136
std::remove_const_t< std::remove_reference_t< S > > RemoveCRType
Definition: linalg.h:114
constexpr int32_t CalcSliceDim()
Calculate the dimension of sliced tensor.
Definition: linalg.h:95
constexpr LINALG_HD auto UnrollLoop(Fn fn)
Definition: linalg.h:120
std::conditional_t< std::is_integral< RemoveCRType< S > >::value, IntTag, S > IndexToTag
Definition: linalg.h:117
constexpr size_t Offset(S(&strides)[D], size_t n, Head head)
Definition: linalg.h:53
decltype(auto) constexpr LINALG_HD Apply(Fn &&f, Tup &&t, std::index_sequence< I... >)
Definition: linalg.h:229
LINALG_HD void IndexToArr(std::size_t(&arr)[D], Head head)
Definition: linalg.h:161
constexpr void CalcStride(size_t const (&shape)[D], size_t(&stride)[D])
Definition: linalg.h:66
constexpr auto ArrToTuple(T(&arr)[N], std::index_sequence< Idx... >)
Definition: linalg.h:178
int32_t NativePopc(T v)
Definition: linalg.h:130
std::enable_if_t< IsAllIntegral< Index... >::value > EnableIfIntegral
Definition: linalg.h:260
constexpr size_t CalcSize(size_t(&shape)[D])
Definition: linalg.h:105
Definition: linalg.h:40
constexpr detail::RangeTag< I > Range(I beg, I end)
Specify a range of elements in the axis for slicing.
Definition: linalg.h:271
auto Make1dInterface(T const *vec, std::size_t len)
Definition: linalg.h:750
auto MakeTensorView(Context const *ctx, Container &data, S &&...shape)
Constructor for automatic type deduction.
Definition: linalg.h:581
auto ArrayInterfaceStr(TensorView< T const, D > const &t)
Return string representation of array interface.
Definition: linalg.h:736
auto MakeVec(T *ptr, size_t s, DeviceOrd device=DeviceOrd::CPU())
Create a vector view from contigious memory.
Definition: linalg.h:661
LINALG_HD auto UnravelIndex(size_t idx, common::Span< size_t const, D > shape)
Turns linear index into multi-dimension index. Similar to numpy unravel.
Definition: linalg.h:625
void Stack(Tensor< T, D > *l, Tensor< T, D > const &r)
Definition: linalg.h:998
auto Constant(Context const *ctx, T v, Index &&...index)
Create an array with value v.
Definition: linalg.h:980
auto Zeros(Context const *ctx, Index &&...index)
Like np.zeros, return a new array of given shape and type, filled with zeros.
Definition: linalg.h:992
auto Empty(Context const *ctx, Index &&...index)
Create an array without initialization.
Definition: linalg.h:969
constexpr detail::AllTag All()
Specify all elements in the axis for slicing.
Definition: linalg.h:266
Json ArrayInterface(TensorView< T const, D > const &t)
Array Interface defined by numpy.
Definition: linalg.h:692
Order
Definition: linalg.h:275
@ kC
Definition: linalg.h:276
@ kF
Definition: linalg.h:277
JsonInteger Integer
Definition: json.h:617
#define SPAN_CHECK(cond)
Definition: span.h:117
Runtime context for XGBoost. Contains information like threads and device.
Definition: context.h:133
DeviceOrd Device() const
Get the current device and ordinal.
Definition: context.h:208
bool IsCUDA() const
Is XGBoost running on a CUDA device?
Definition: context.h:185
A type for device ordinal. The type is packed into 32-bit for efficient use in viewing types like lin...
Definition: context.h:34
bool IsCUDA() const
Definition: context.h:44
bool IsCPU() const
Definition: context.h:45
constexpr static auto CPU()
Constructor for CPU.
Definition: context.h:64
Definition: linalg.h:80
static constexpr char TypeChar()
Definition: linalg.h:45
Definition: linalg.h:249
Definition: linalg.h:82
Definition: linalg.h:85
constexpr size_t Size() const
Definition: linalg.h:88
I end
Definition: linalg.h:87
I beg
Definition: linalg.h:86