xgboost
linalg.h
Go to the documentation of this file.
1 
6 #ifndef XGBOOST_LINALG_H_
7 #define XGBOOST_LINALG_H_
8 
9 #include <dmlc/endian.h>
10 #include <xgboost/base.h>
11 #include <xgboost/context.h>
13 #include <xgboost/json.h>
14 #include <xgboost/span.h>
15 
16 #include <algorithm>
17 #include <cassert>
18 #include <cstddef> // for size_t
19 #include <cstdint> // for int32_t
20 #include <limits>
21 #include <string>
22 #include <tuple> // for make_tuple
23 #include <type_traits>
24 #include <utility>
25 #include <vector>
26 
27 #if defined(_MSC_VER)
28 #include <intrin.h>
29 #endif // defined(_MSC_VER)
30 
31 // decouple it from xgboost.
32 #ifndef LINALG_HD
33 #if defined(__CUDA__) || defined(__NVCC__)
34 #define LINALG_HD __host__ __device__
35 #else
36 #define LINALG_HD
37 #endif // defined (__CUDA__) || defined(__NVCC__)
38 #endif // LINALG_HD
39 
40 namespace xgboost::linalg {
41 namespace detail {
42 
44  template <typename T>
45  static constexpr char TypeChar() {
46  return (std::is_floating_point_v<T>
47  ? 'f'
48  : (std::is_integral_v<T> ? (std::is_signed_v<T> ? 'i' : 'u') : '\0'));
49  }
50 };
51 
52 template <size_t dim, typename S, typename Head, size_t D>
53 constexpr size_t Offset(S (&strides)[D], size_t n, Head head) {
54  static_assert(dim < D);
55  return n + head * strides[dim];
56 }
57 
58 template <size_t dim, typename S, size_t D, typename Head, typename... Tail>
59 constexpr std::enable_if_t<sizeof...(Tail) != 0, size_t> Offset(S (&strides)[D], size_t n,
60  Head head, Tail &&...rest) {
61  static_assert(dim < D);
62  return Offset<dim + 1>(strides, n + (head * strides[dim]), std::forward<Tail>(rest)...);
63 }
64 
65 template <int32_t D, bool f_array = false>
66 constexpr void CalcStride(size_t const (&shape)[D], size_t (&stride)[D]) {
67  if (f_array) {
68  stride[0] = 1;
69  for (int32_t s = 1; s < D; ++s) {
70  stride[s] = shape[s - 1] * stride[s - 1];
71  }
72  } else {
73  stride[D - 1] = 1;
74  for (int32_t s = D - 2; s >= 0; --s) {
75  stride[s] = shape[s + 1] * stride[s + 1];
76  }
77  }
78 }
79 
80 struct AllTag {};
81 
82 struct IntTag {};
83 
84 template <typename I>
85 struct RangeTag {
86  I beg;
87  I end;
88  [[nodiscard]] constexpr size_t Size() const { return end - beg; }
89 };
90 
94 template <typename T>
95 constexpr int32_t CalcSliceDim() {
96  return std::is_same_v<T, IntTag> ? 0 : 1;
97 }
98 
99 template <typename T, typename... S>
100 constexpr std::enable_if_t<sizeof...(S) != 0, int32_t> CalcSliceDim() {
101  return CalcSliceDim<T>() + CalcSliceDim<S...>();
102 }
103 
104 template <int32_t D>
105 constexpr size_t CalcSize(size_t (&shape)[D]) {
106  size_t size = 1;
107  for (auto d : shape) {
108  size *= d;
109  }
110  return size;
111 }
112 
113 template <typename S>
114 using RemoveCRType = std::remove_const_t<std::remove_reference_t<S>>;
115 
116 template <typename S>
117 using IndexToTag = std::conditional_t<std::is_integral_v<RemoveCRType<S>>, IntTag, S>;
118 
119 template <int32_t n, typename Fn>
120 LINALG_HD constexpr auto UnrollLoop(Fn fn) {
121 #if defined __CUDA_ARCH__
122 #pragma unroll n
123 #endif // defined __CUDA_ARCH__
124  for (int32_t i = 0; i < n; ++i) {
125  fn(i);
126  }
127 }
128 
129 template <typename T>
130 int32_t NativePopc(T v) {
131  int c = 0;
132  for (; v != 0; v &= v - 1) c++;
133  return c;
134 }
135 
136 inline LINALG_HD int Popc(uint32_t v) {
137 #if defined(__CUDA_ARCH__)
138  return __popc(v);
139 #elif defined(__GNUC__) || defined(__clang__)
140  return __builtin_popcount(v);
141 #elif defined(_MSC_VER)
142  return __popcnt(v);
143 #else
144  return NativePopc(v);
145 #endif // compiler
146 }
147 
148 inline LINALG_HD int Popc(uint64_t v) {
149 #if defined(__CUDA_ARCH__)
150  return __popcll(v);
151 #elif defined(__GNUC__) || defined(__clang__)
152  return __builtin_popcountll(v);
153 #elif defined(_MSC_VER) && defined(_M_X64)
154  return __popcnt64(v);
155 #else
156  return NativePopc(v);
157 #endif // compiler
158 }
159 
160 template <std::size_t D, typename Head>
161 LINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head) {
162  static_assert(std::is_integral_v<std::remove_reference_t<Head>>, "Invalid index type.");
163  arr[D - 1] = head;
164 }
165 
169 template <std::size_t D, typename Head, typename... Rest>
170 LINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head, Rest &&...index) {
171  static_assert(sizeof...(Rest) < D, "Index overflow.");
172  static_assert(std::is_integral_v<std::remove_reference_t<Head>>, "Invalid index type.");
173  arr[D - sizeof...(Rest) - 1] = head;
174  IndexToArr(arr, std::forward<Rest>(index)...);
175 }
176 
177 template <class T, std::size_t N, std::size_t... Idx>
178 constexpr auto ArrToTuple(T (&arr)[N], std::index_sequence<Idx...>) {
179  return std::make_tuple(arr[Idx]...);
180 }
181 
185 template <class T, std::size_t N>
186 constexpr auto ArrToTuple(T (&arr)[N]) {
187  return ArrToTuple(arr, std::make_index_sequence<N>{});
188 }
189 
190 // uint division optimization inspired by the CIndexer in cupy. Division operation is
191 // slow on both CPU and GPU, especially 64 bit integer. So here we first try to avoid 64
192 // bit when the index is smaller, then try to avoid division when it's exp of 2.
193 template <typename I, std::int32_t D>
195  std::size_t index[D]{0};
196  static_assert(std::is_signed_v<decltype(D)>,
197  "Don't change the type without changing the for loop.");
198  auto const sptr = shape.data();
199  for (int32_t dim = D; --dim > 0;) {
200  auto s = static_cast<std::remove_const_t<std::remove_reference_t<I>>>(sptr[dim]);
201  if (s & (s - 1)) {
202  auto t = idx / s;
203  index[dim] = idx - t * s;
204  idx = t;
205  } else { // exp of 2
206  index[dim] = idx & (s - 1);
207  idx >>= Popc(s - 1);
208  }
209  }
210  index[0] = idx;
211  return ArrToTuple(index);
212 }
213 
214 template <size_t dim, typename I, int32_t D>
215 void ReshapeImpl(size_t (&out_shape)[D], I s) {
216  static_assert(dim < D);
217  out_shape[dim] = s;
218 }
219 
220 template <size_t dim, int32_t D, typename... S, typename I,
221  std::enable_if_t<sizeof...(S) != 0> * = nullptr>
222 void ReshapeImpl(size_t (&out_shape)[D], I &&s, S &&...rest) {
223  static_assert(dim < D);
224  out_shape[dim] = s;
225  ReshapeImpl<dim + 1>(out_shape, std::forward<S>(rest)...);
226 }
227 
231 template <class...>
232 struct Conjunction : std::true_type {};
233 template <class B1>
234 struct Conjunction<B1> : B1 {};
235 template <class B1, class... Bn>
236 struct Conjunction<B1, Bn...>
237  : std::conditional_t<static_cast<bool>(B1::value), Conjunction<Bn...>, B1> {};
238 
239 template <typename... Index>
241 
242 template <typename... Index>
243 using EnableIfIntegral = std::enable_if_t<IsAllIntegral<Index...>::value>;
244 } // namespace detail
245 
249 constexpr detail::AllTag All() { return {}; }
253 template <typename I>
254 constexpr detail::RangeTag<I> Range(I beg, I end) {
255  return {beg, end};
256 }
257 
258 enum Order : std::uint8_t {
259  kC, // Row major
260  kF, // Col major
261 };
262 
276 template <typename T, int32_t kDim>
277 class TensorView {
278  public:
279  using ShapeT = std::size_t[kDim];
280  using StrideT = ShapeT;
281 
282  using element_type = T; // NOLINT
283  using value_type = std::remove_cv_t<T>; // NOLINT
284 
285  private:
286  StrideT stride_{1};
287  ShapeT shape_{0};
288  common::Span<T> data_;
289  T *ptr_{nullptr}; // pointer of data_ to avoid bound check.
290 
291  size_t size_{0};
292  DeviceOrd device_;
293 
294  // Unlike `Tensor`, the data_ can have arbitrary size since this is just a view.
295  LINALG_HD void CalcSize() {
296  if (data_.empty()) {
297  size_ = 0;
298  } else {
299  size_ = detail::CalcSize(shape_);
300  }
301  }
302 
303  template <size_t old_dim, size_t new_dim, int32_t D, typename I>
304  LINALG_HD size_t MakeSliceDim(std::size_t new_shape[D], std::size_t new_stride[D],
305  detail::RangeTag<I> &&range) const {
306  static_assert(new_dim < D);
307  static_assert(old_dim < kDim);
308  new_stride[new_dim] = stride_[old_dim];
309  new_shape[new_dim] = range.Size();
310  assert(static_cast<decltype(shape_[old_dim])>(range.end) <= shape_[old_dim]);
311 
312  auto offset = stride_[old_dim] * range.beg;
313  return offset;
314  }
318  template <size_t old_dim, size_t new_dim, int32_t D, typename I, typename... S>
319  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D],
320  detail::RangeTag<I> &&range, S &&...slices) const {
321  static_assert(new_dim < D);
322  static_assert(old_dim < kDim);
323  new_stride[new_dim] = stride_[old_dim];
324  new_shape[new_dim] = range.Size();
325  assert(static_cast<decltype(shape_[old_dim])>(range.end) <= shape_[old_dim]);
326 
327  auto offset = stride_[old_dim] * range.beg;
328  return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,
329  std::forward<S>(slices)...) +
330  offset;
331  }
332 
333  template <size_t old_dim, size_t new_dim, int32_t D>
334  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag) const {
335  static_assert(new_dim < D);
336  static_assert(old_dim < kDim);
337  new_stride[new_dim] = stride_[old_dim];
338  new_shape[new_dim] = shape_[old_dim];
339  return 0;
340  }
344  template <size_t old_dim, size_t new_dim, int32_t D, typename... S>
345  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag,
346  S &&...slices) const {
347  static_assert(new_dim < D);
348  static_assert(old_dim < kDim);
349  new_stride[new_dim] = stride_[old_dim];
350  new_shape[new_dim] = shape_[old_dim];
351  return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,
352  std::forward<S>(slices)...);
353  }
354 
355  template <size_t old_dim, size_t new_dim, int32_t D, typename Index>
356  LINALG_HD size_t MakeSliceDim(DMLC_ATTRIBUTE_UNUSED size_t new_shape[D],
357  DMLC_ATTRIBUTE_UNUSED size_t new_stride[D], Index i) const {
358  static_assert(old_dim < kDim);
359  return stride_[old_dim] * i;
360  }
364  template <size_t old_dim, size_t new_dim, int32_t D, typename Index, typename... S>
365  LINALG_HD std::enable_if_t<std::is_integral_v<Index>, size_t> MakeSliceDim(
366  size_t new_shape[D], size_t new_stride[D], Index i, S &&...slices) const {
367  static_assert(old_dim < kDim);
368  auto offset = stride_[old_dim] * i;
369  auto res =
370  MakeSliceDim<old_dim + 1, new_dim, D>(new_shape, new_stride, std::forward<S>(slices)...);
371  return res + offset;
372  }
373 
374  public:
375  size_t constexpr static kValueSize = sizeof(T);
376  size_t constexpr static kDimension = kDim;
377 
378  public:
390  template <typename I, std::int32_t D>
391  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], DeviceOrd device)
392  : TensorView{data, shape, device, Order::kC} {}
393 
394  template <typename I, int32_t D>
395  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], DeviceOrd device, Order order)
396  : data_{data}, ptr_{data_.data()}, device_{device} {
397  static_assert(D > 0 && D <= kDim, "Invalid shape.");
398  // shape
399  detail::UnrollLoop<D>([&](auto i) { shape_[i] = shape[i]; });
400  for (auto i = D; i < kDim; ++i) {
401  shape_[i] = 1;
402  }
403  // stride
404  switch (order) {
405  case Order::kC: {
406  detail::CalcStride(shape_, stride_);
407  break;
408  }
409  case Order::kF: {
410  detail::CalcStride<kDim, true>(shape_, stride_);
411  break;
412  }
413  default: {
414  SPAN_CHECK(false);
415  }
416  }
417  // size
418  this->CalcSize();
419  }
420 
425  template <typename I, std::int32_t D>
426  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], I const (&stride)[D],
427  DeviceOrd device)
428  : data_{data}, ptr_{data_.data()}, device_{device} {
429  static_assert(D == kDim, "Invalid shape & stride.");
430  detail::UnrollLoop<D>([&](auto i) {
431  shape_[i] = shape[i];
432  stride_[i] = stride[i];
433  });
434  this->CalcSize();
435  }
436 
437  template <
438  typename U,
439  std::enable_if_t<common::detail::IsAllowedElementTypeConversion<U, T>::value> * = nullptr>
440  LINALG_HD TensorView(TensorView<U, kDim> const &that) // NOLINT
441  : data_{that.Values()}, ptr_{data_.data()}, size_{that.Size()}, device_{that.Device()} {
442  detail::UnrollLoop<kDim>([&](auto i) {
443  stride_[i] = that.Stride(i);
444  shape_[i] = that.Shape(i);
445  });
446  }
447 
461  template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
462  LINALG_HD T &operator()(Index &&...index) {
463  static_assert(sizeof...(index) <= kDim, "Invalid index.");
464  size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
465  assert(offset < data_.size() && "Out of bound access.");
466  return ptr_[offset];
467  }
471  template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
472  LINALG_HD T const &operator()(Index &&...index) const {
473  static_assert(sizeof...(index) <= kDim, "Invalid index.");
474  size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
475  assert(offset < data_.size() && "Out of bound access.");
476  return ptr_[offset];
477  }
478 
492  template <typename... S>
493  LINALG_HD auto Slice(S &&...slices) const {
494  static_assert(sizeof...(slices) <= kDim, "Invalid slice.");
495  int32_t constexpr kNewDim{detail::CalcSliceDim<detail::IndexToTag<S>...>()};
496  size_t new_shape[kNewDim];
497  size_t new_stride[kNewDim];
498  auto offset = MakeSliceDim<0, 0, kNewDim>(new_shape, new_stride, std::forward<S>(slices)...);
499  // ret is a different type due to changed dimension, so we can not access its private
500  // fields.
501  TensorView<T, kNewDim> ret{data_.subspan(data_.empty() ? 0 : offset), new_shape, new_stride,
502  device_};
503  return ret;
504  }
505 
506  LINALG_HD auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }
510  LINALG_HD auto Shape(size_t i) const { return shape_[i]; }
511  LINALG_HD auto Stride() const { return common::Span<size_t const, kDim>{stride_}; }
515  LINALG_HD auto Stride(size_t i) const { return stride_[i]; }
516 
520  [[nodiscard]] LINALG_HD std::size_t Size() const { return size_; }
521  [[nodiscard]] bool Empty() const { return Size() == 0; }
525  [[nodiscard]] LINALG_HD bool Contiguous() const {
526  return data_.size() == this->Size() || this->CContiguous() || this->FContiguous();
527  }
531  [[nodiscard]] LINALG_HD bool CContiguous() const {
532  StrideT stride;
533  static_assert(std::is_same_v<decltype(stride), decltype(stride_)>);
534  // It's contiguous if the stride can be calculated from shape.
535  detail::CalcStride(shape_, stride);
537  }
541  [[nodiscard]] LINALG_HD bool FContiguous() const {
542  StrideT stride;
543  static_assert(std::is_same_v<decltype(stride), decltype(stride_)>);
544  // It's contiguous if the stride can be calculated from shape.
545  detail::CalcStride<kDim, true>(shape_, stride);
547  }
551  LINALG_HD auto Values() const -> decltype(data_) const & { return data_; }
555  LINALG_HD auto Device() const { return device_; }
556 };
557 
561 template <typename Container, typename... S,
562  std::enable_if_t<!common::detail::IsSpan<Container>::value &&
563  !std::is_pointer_v<Container>> * = nullptr>
564 auto MakeTensorView(Context const *ctx, Container &data, S &&...shape) { // NOLINT
565  using T = std::conditional_t<std::is_const_v<Container>,
566  std::add_const_t<typename Container::value_type>,
567  typename Container::value_type>;
568  std::size_t in_shape[sizeof...(S)];
569  detail::IndexToArr(in_shape, std::forward<S>(shape)...);
570  return TensorView<T, sizeof...(S)>{data, in_shape, ctx->Device()};
571 }
572 
573 template <typename T, decltype(common::dynamic_extent) ext, typename... S>
574 LINALG_HD auto MakeTensorView(DeviceOrd device, common::Span<T, ext> data, S &&...shape) {
575  std::size_t in_shape[sizeof...(S)];
576  detail::IndexToArr(in_shape, std::forward<S>(shape)...);
577  return TensorView<T, sizeof...(S)>{data, in_shape, device};
578 }
579 
580 template <typename T, decltype(common::dynamic_extent) ext, typename... S>
581 auto MakeTensorView(Context const *ctx, common::Span<T, ext> data, S &&...shape) {
582  return MakeTensorView(ctx->Device(), data, std::forward<S>(shape)...);
583 }
584 
585 template <typename T, decltype(common::dynamic_extent) ext, typename... S>
586 auto MakeTensorView(Context const *ctx, Order order, common::Span<T, ext> data, S &&...shape) {
587  std::size_t in_shape[sizeof...(S)];
588  detail::IndexToArr(in_shape, std::forward<S>(shape)...);
589  return TensorView<T, sizeof...(S)>{data, in_shape, ctx->Device(), order};
590 }
591 
592 template <typename T, typename... S>
593 auto MakeTensorView(Context const *ctx, HostDeviceVector<T> *data, S &&...shape) {
594  auto span = ctx->IsCPU() ? data->HostSpan() : data->DeviceSpan();
595  return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);
596 }
597 
598 template <typename T, typename... S>
599 auto MakeTensorView(Context const *ctx, HostDeviceVector<T> const *data, S &&...shape) {
600  auto span = ctx->IsCPU() ? data->ConstHostSpan() : data->ConstDeviceSpan();
601  return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);
602 }
603 
607 template <size_t D>
609  if (idx > std::numeric_limits<uint32_t>::max()) {
610  return detail::UnravelImpl<uint64_t, D>(static_cast<uint64_t>(idx), shape);
611  } else {
612  return detail::UnravelImpl<uint32_t, D>(static_cast<uint32_t>(idx), shape);
613  }
614 }
615 
616 template <size_t D>
617 LINALG_HD auto UnravelIndex(size_t idx, std::size_t const (&shape)[D]) {
619 }
620 
621 template <typename... S>
622 LINALG_HD auto UnravelIndex(std::size_t idx, S... shape) {
623  std::size_t s[sizeof...(S)];
624  detail::IndexToArr(s, shape...);
625  return UnravelIndex(idx, common::Span<std::size_t const, sizeof...(S)>(s));
626 }
627 
633 template <typename T>
635 
643 template <typename T>
644 auto MakeVec(T *ptr, size_t s, DeviceOrd device = DeviceOrd::CPU()) {
645  return linalg::TensorView<T, 1>{{ptr, s}, {s}, device};
646 }
647 
648 template <typename T>
650  return MakeVec(data->Device().IsCPU() ? data->HostPointer() : data->DevicePointer(),
651  data->Size(), data->Device());
652 }
653 
654 template <typename T>
655 auto MakeVec(HostDeviceVector<T> const *data) {
656  return MakeVec(data->Device().IsCPU() ? data->ConstHostPointer() : data->ConstDevicePointer(),
657  data->Size(), data->Device());
658 }
659 
665 template <typename T>
667 
674 template <typename T, std::int32_t D>
676  Json array_interface{Object{}};
677  array_interface["data"] = std::vector<Json>(2);
678  array_interface["data"][0] = Integer{reinterpret_cast<int64_t>(t.Values().data())};
679  array_interface["data"][1] = Boolean{true};
680  if (t.Device().IsCUDA()) {
681  // Change this once we have different CUDA stream.
682  array_interface["stream"] = Integer{2};
683  }
684  std::vector<Json> shape(t.Shape().size());
685  std::vector<Json> stride(t.Stride().size());
686  for (size_t i = 0; i < t.Shape().size(); ++i) {
687  shape[i] = Integer(t.Shape(i));
688  stride[i] = Integer(t.Stride(i) * sizeof(T));
689  }
690  array_interface["shape"] = Array{shape};
691  array_interface["strides"] = Array{stride};
692  array_interface["version"] = 3;
693 
694  char constexpr kT = detail::ArrayInterfaceHandler::TypeChar<T>();
695  static_assert(kT != '\0');
696  if (DMLC_LITTLE_ENDIAN) {
697  array_interface["typestr"] = String{"<" + (kT + std::to_string(sizeof(T)))};
698  } else {
699  array_interface["typestr"] = String{">" + (kT + std::to_string(sizeof(T)))};
700  }
701  return array_interface;
702 }
703 
707 template <typename T, int32_t D>
709  TensorView<T const, D> const &as_const = t;
710  auto res = ArrayInterface(as_const);
711  res["data"][1] = Boolean{false};
712  return res;
713 }
714 
718 template <typename T, int32_t D>
720  std::string str;
721  Json::Dump(ArrayInterface(t), &str);
722  return str;
723 }
724 
725 template <typename T, int32_t D>
727  std::string str;
728  Json::Dump(ArrayInterface(t), &str);
729  return str;
730 }
731 
732 template <typename T>
733 auto Make1dInterface(T const *vec, std::size_t len) {
734  Context ctx;
735  auto t = linalg::MakeTensorView(&ctx, common::Span{vec, len}, len);
736  auto str = linalg::ArrayInterfaceStr(t);
737  return str;
738 }
739 
744 template <typename T, int32_t kDim = 5>
745 class Tensor {
746  public:
747  using ShapeT = std::size_t[kDim];
748  using StrideT = ShapeT;
749 
750  private:
751  HostDeviceVector<T> data_;
752  ShapeT shape_{0};
753  Order order_{Order::kC};
754 
755  template <typename I, std::int32_t D>
756  void Initialize(I const (&shape)[D], DeviceOrd device) {
757  static_assert(D <= kDim, "Invalid shape.");
758  std::copy(shape, shape + D, shape_);
759  for (auto i = D; i < kDim; ++i) {
760  shape_[i] = 1;
761  }
762  if (!device.IsCPU()) {
763  data_.SetDevice(device);
764  data_.ConstDevicePointer(); // Pull to device;
765  }
766  CHECK_EQ(data_.Size(), detail::CalcSize(shape_));
767  }
768 
769  public:
770  Tensor() = default;
771 
778  template <typename I, int32_t D>
779  explicit Tensor(I const (&shape)[D], DeviceOrd device, Order order = kC)
780  : Tensor{common::Span<I const, D>{shape}, device, order} {}
781 
782  template <typename I, size_t D>
783  explicit Tensor(common::Span<I const, D> shape, DeviceOrd device, Order order = kC)
784  : order_{order} {
785  // No device unroll as this is a host only function.
786  std::copy(shape.data(), shape.data() + D, shape_);
787  for (auto i = D; i < kDim; ++i) {
788  shape_[i] = 1;
789  }
790  auto size = detail::CalcSize(shape_);
791  if (!device.IsCPU()) {
792  data_.SetDevice(device);
793  }
794  data_.Resize(size);
795  if (!device.IsCPU()) {
796  data_.DevicePointer(); // Pull to device
797  }
798  }
802  template <typename It, typename I, int32_t D>
803  explicit Tensor(It begin, It end, I const (&shape)[D], DeviceOrd device, Order order = kC)
804  : order_{order} {
805  auto &h_vec = data_.HostVector();
806  h_vec.insert(h_vec.begin(), begin, end);
807  // shape
808  this->Initialize(shape, device);
809  }
810 
811  template <typename I, int32_t D>
812  explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], DeviceOrd device,
813  Order order = kC)
814  : order_{order} {
815  auto &h_vec = data_.HostVector();
816  h_vec = data;
817  // shape
818  this->Initialize(shape, device);
819  }
824  template <typename... Index>
825  T &operator()(Index &&...idx) {
826  return this->HostView()(std::forward<Index>(idx)...);
827  }
832  template <typename... Index>
833  T const &operator()(Index &&...idx) const {
834  return this->HostView()(std::forward<Index>(idx)...);
835  }
836 
840  auto View(DeviceOrd device) {
841  if (device.IsCPU()) {
842  auto span = data_.HostSpan();
843  return TensorView<T, kDim>{span, shape_, device, order_};
844  } else {
845  data_.SetDevice(device);
846  auto span = data_.DeviceSpan();
847  return TensorView<T, kDim>{span, shape_, device, order_};
848  }
849  }
850  auto View(DeviceOrd device) const {
851  if (device.IsCPU()) {
852  auto span = data_.ConstHostSpan();
853  return TensorView<T const, kDim>{span, shape_, device, order_};
854  } else {
855  data_.SetDevice(device);
856  auto span = data_.ConstDeviceSpan();
857  return TensorView<T const, kDim>{span, shape_, device, order_};
858  }
859  }
860 
861  auto HostView() { return this->View(DeviceOrd::CPU()); }
862  auto HostView() const { return this->View(DeviceOrd::CPU()); }
863 
864  [[nodiscard]] std::size_t Size() const { return data_.Size(); }
865  [[nodiscard]] bool Empty() const { return Size() == 0; }
866 
867  auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }
868  auto Shape(size_t i) const { return shape_[i]; }
869 
870  HostDeviceVector<T> *Data() { return &data_; }
871  HostDeviceVector<T> const *Data() const { return &data_; }
872 
879  template <typename Fn>
880  void ModifyInplace(Fn &&fn) {
881  fn(this->Data(), common::Span<size_t, kDim>{this->shape_});
882  CHECK_EQ(this->Data()->Size(), detail::CalcSize(this->shape_))
883  << "Inconsistent size after modification.";
884  }
885 
891  template <typename... S, detail::EnableIfIntegral<S...> * = nullptr>
892  void Reshape(S &&...s) {
893  static_assert(sizeof...(S) <= kDim, "Invalid shape.");
894  detail::ReshapeImpl<0>(shape_, std::forward<S>(s)...);
895  auto constexpr kEnd = sizeof...(S);
896  static_assert(kEnd <= kDim, "Invalid shape.");
897  std::fill(shape_ + kEnd, shape_ + kDim, 1);
898  auto n = detail::CalcSize(shape_);
899  data_.Resize(n);
900  }
901 
907  template <size_t D>
909  static_assert(D <= kDim, "Invalid shape.");
910  std::copy(shape.data(), shape.data() + D, this->shape_);
911  std::fill(shape_ + D, shape_ + kDim, 1);
912  auto n = detail::CalcSize(shape_);
913  data_.Resize(n);
914  }
915 
916  template <size_t D>
917  void Reshape(size_t (&shape)[D]) {
918  this->Reshape(common::Span<size_t const, D>{shape});
919  }
923  template <typename... S>
924  auto Slice(S &&...slices) const {
925  return this->HostView().Slice(std::forward<S>(slices)...);
926  }
930  template <typename... S>
931  auto Slice(S &&...slices) {
932  return this->HostView().Slice(std::forward<S>(slices)...);
933  }
934 
938  void SetDevice(DeviceOrd device) const { data_.SetDevice(device); }
939  [[nodiscard]] DeviceOrd Device() const { return data_.Device(); }
940 };
941 
942 template <typename T>
944 
945 template <typename T>
947 
951 template <typename T, typename... Index>
952 auto Empty(Context const *ctx, Index &&...index) {
953  Tensor<T, sizeof...(Index)> t;
954  t.SetDevice(ctx->Device());
955  t.Reshape(index...);
956  return t;
957 }
958 
962 template <typename T, typename... Index>
963 auto Constant(Context const *ctx, T v, Index &&...index) {
964  Tensor<T, sizeof...(Index)> t;
965  t.SetDevice(ctx->Device());
966  t.Reshape(index...);
967  t.Data()->Fill(std::move(v));
968  return t;
969 }
970 
974 template <typename T, typename... Index>
975 auto Zeros(Context const *ctx, Index &&...index) {
976  return Constant(ctx, static_cast<T>(0), index...);
977 }
978 
979 // Only first axis is supported for now.
980 template <typename T, int32_t D>
981 void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {
982  if (r.Device().IsCUDA()) {
983  l->SetDevice(r.Device());
984  }
986  for (size_t i = 1; i < D; ++i) {
987  if (shape[i] == 0) {
988  shape[i] = r.Shape(i);
989  } else {
990  CHECK_EQ(shape[i], r.Shape(i));
991  }
992  }
993  data->Extend(*r.Data());
994  shape[0] = l->Shape(0) + r.Shape(0);
995  });
996 }
997 } // namespace xgboost::linalg
998 
999 #if defined(LINALG_HD)
1000 #undef LINALG_HD
1001 #endif // defined(LINALG_HD)
1002 #endif // XGBOOST_LINALG_H_
Defines configuration macros and basic types for xgboost.
Definition: host_device_vector.h:87
std::size_t Size() const
const T * ConstDevicePointer() const
void Extend(const HostDeviceVector< T > &other)
common::Span< T const > ConstHostSpan() const
Definition: host_device_vector.h:116
std::vector< T > & HostVector()
common::Span< const T > ConstDeviceSpan() const
T * HostPointer()
Definition: host_device_vector.h:113
common::Span< T > DeviceSpan()
common::Span< T > HostSpan()
Definition: host_device_vector.h:114
void SetDevice(DeviceOrd device) const
DeviceOrd Device() const
void Resize(std::size_t new_size)
const T * ConstHostPointer() const
Definition: host_device_vector.h:117
Definition: json.h:116
Describes both true and false.
Definition: json.h:350
Definition: json.h:295
Definition: json.h:219
Definition: json.h:89
Data structure representing JSON format.
Definition: json.h:392
static void Dump(Json json, std::string *out, std::ios::openmode mode=std::ios::out)
Encode the JSON object. Optional parameter mode for choosing between text and binary (ubjson) output.
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition: span.h:431
constexpr XGBOOST_DEVICE pointer data() const __span_noexcept
Definition: span.h:550
XGBOOST_DEVICE auto subspan() const -> Span< element_type, detail::ExtentValue< Extent, Offset, Count >::value >
Definition: span.h:597
constexpr XGBOOST_DEVICE index_type size() const __span_noexcept
Definition: span.h:555
constexpr XGBOOST_DEVICE bool empty() const __span_noexcept
Definition: span.h:562
A tensor view with static type and dimension. It implements indexing and slicing.
Definition: linalg.h:277
LINALG_HD std::size_t Size() const
Number of items in the tensor.
Definition: linalg.h:520
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], DeviceOrd device)
Create a tensor with data and shape.
Definition: linalg.h:391
std::remove_cv_t< T > value_type
Definition: linalg.h:283
T element_type
Definition: linalg.h:282
LINALG_HD bool CContiguous() const
Whether it's a c-contiguous array.
Definition: linalg.h:531
LINALG_HD auto Stride(size_t i) const
Definition: linalg.h:515
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], DeviceOrd device, Order order)
Definition: linalg.h:395
LINALG_HD auto Shape() const
Definition: linalg.h:506
ShapeT StrideT
Definition: linalg.h:280
constexpr static size_t kDimension
Definition: linalg.h:376
LINALG_HD auto Stride() const
Definition: linalg.h:511
LINALG_HD auto Slice(S &&...slices) const
Slice the tensor. The returned tensor has inferred dim and shape. Scalar result is not supported.
Definition: linalg.h:493
LINALG_HD auto Values() const -> decltype(data_) const &
Obtain a reference to the raw data.
Definition: linalg.h:551
LINALG_HD bool Contiguous() const
Whether this is a contiguous array, both C and F contiguous returns true.
Definition: linalg.h:525
bool Empty() const
Definition: linalg.h:521
LINALG_HD T const & operator()(Index &&...index) const
Index the tensor to obtain a scalar value.
Definition: linalg.h:472
LINALG_HD TensorView(TensorView< U, kDim > const &that)
Definition: linalg.h:440
LINALG_HD T & operator()(Index &&...index)
Index the tensor to obtain a scalar value.
Definition: linalg.h:462
constexpr static size_t kValueSize
Definition: linalg.h:375
LINALG_HD bool FContiguous() const
Whether it's a f-contiguous array.
Definition: linalg.h:541
LINALG_HD auto Shape(size_t i) const
Definition: linalg.h:510
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], I const (&stride)[D], DeviceOrd device)
Create a tensor with data, shape and strides. Don't use this constructor if stride can be calculated ...
Definition: linalg.h:426
std::size_t[kDim] ShapeT
Definition: linalg.h:279
LINALG_HD auto Device() const
Obtain the CUDA device ordinal.
Definition: linalg.h:555
A tensor storage. To use it for other functionality like slicing one needs to obtain a view first....
Definition: linalg.h:745
auto Slice(S &&...slices)
Get a host view on the slice.
Definition: linalg.h:931
bool Empty() const
Definition: linalg.h:865
Tensor(It begin, It end, I const (&shape)[D], DeviceOrd device, Order order=kC)
Definition: linalg.h:803
auto Slice(S &&...slices) const
Get a host view on the slice.
Definition: linalg.h:924
HostDeviceVector< T > const * Data() const
Definition: linalg.h:871
void Reshape(size_t(&shape)[D])
Definition: linalg.h:917
auto View(DeviceOrd device) const
Definition: linalg.h:850
auto HostView()
Definition: linalg.h:861
auto Shape(size_t i) const
Definition: linalg.h:868
HostDeviceVector< T > * Data()
Definition: linalg.h:870
T & operator()(Index &&...idx)
Index operator. Not thread safe, should not be used in performance critical region....
Definition: linalg.h:825
auto View(DeviceOrd device)
Get a TensorView for this tensor.
Definition: linalg.h:840
Tensor(common::Span< I const, D > shape, DeviceOrd device, Order order=kC)
Definition: linalg.h:783
auto Shape() const
Definition: linalg.h:867
void ModifyInplace(Fn &&fn)
Visitor function for modification that changes shape and data.
Definition: linalg.h:880
void SetDevice(DeviceOrd device) const
Set device ordinal for this tensor.
Definition: linalg.h:938
Tensor(std::initializer_list< T > data, I const (&shape)[D], DeviceOrd device, Order order=kC)
Definition: linalg.h:812
void Reshape(common::Span< size_t const, D > shape)
Reshape the tensor.
Definition: linalg.h:908
DeviceOrd Device() const
Definition: linalg.h:939
auto HostView() const
Definition: linalg.h:862
Tensor(I const (&shape)[D], DeviceOrd device, Order order=kC)
Create a tensor with shape and device ordinal. The storage is initialized automatically.
Definition: linalg.h:779
T const & operator()(Index &&...idx) const
Index operator. Not thread safe, should not be used in performance critical region....
Definition: linalg.h:833
std::size_t[kDim] ShapeT
Definition: linalg.h:747
void Reshape(S &&...s)
Reshape the tensor.
Definition: linalg.h:892
ShapeT StrideT
Definition: linalg.h:748
std::size_t Size() const
Definition: linalg.h:864
A device-and-host vector abstraction layer.
#define LINALG_HD
Definition: linalg.h:36
constexpr std::size_t dynamic_extent
Definition: span.h:150
Span(std::vector< T > const &) -> Span< T const >
std::conditional_t< std::is_integral_v< RemoveCRType< S > >, IntTag, S > IndexToTag
Definition: linalg.h:117
LINALG_HD auto UnravelImpl(I idx, common::Span< size_t const, D > shape)
Definition: linalg.h:194
void ReshapeImpl(size_t(&out_shape)[D], I s)
Definition: linalg.h:215
LINALG_HD int Popc(uint32_t v)
Definition: linalg.h:136
std::remove_const_t< std::remove_reference_t< S > > RemoveCRType
Definition: linalg.h:114
constexpr int32_t CalcSliceDim()
Calculate the dimension of sliced tensor.
Definition: linalg.h:95
constexpr LINALG_HD auto UnrollLoop(Fn fn)
Definition: linalg.h:120
constexpr size_t Offset(S(&strides)[D], size_t n, Head head)
Definition: linalg.h:53
LINALG_HD void IndexToArr(std::size_t(&arr)[D], Head head)
Definition: linalg.h:161
constexpr void CalcStride(size_t const (&shape)[D], size_t(&stride)[D])
Definition: linalg.h:66
constexpr auto ArrToTuple(T(&arr)[N], std::index_sequence< Idx... >)
Definition: linalg.h:178
int32_t NativePopc(T v)
Definition: linalg.h:130
std::enable_if_t< IsAllIntegral< Index... >::value > EnableIfIntegral
Definition: linalg.h:243
constexpr size_t CalcSize(size_t(&shape)[D])
Definition: linalg.h:105
Definition: linalg.h:40
constexpr detail::RangeTag< I > Range(I beg, I end)
Specify a range of elements in the axis for slicing.
Definition: linalg.h:254
auto Make1dInterface(T const *vec, std::size_t len)
Definition: linalg.h:733
auto MakeTensorView(Context const *ctx, Container &data, S &&...shape)
Constructor for automatic type deduction.
Definition: linalg.h:564
auto ArrayInterfaceStr(TensorView< T const, D > const &t)
Return string representation of array interface.
Definition: linalg.h:719
auto MakeVec(T *ptr, size_t s, DeviceOrd device=DeviceOrd::CPU())
Create a vector view from contigious memory.
Definition: linalg.h:644
LINALG_HD auto UnravelIndex(size_t idx, common::Span< size_t const, D > shape)
Turns linear index into multi-dimension index. Similar to numpy unravel.
Definition: linalg.h:608
void Stack(Tensor< T, D > *l, Tensor< T, D > const &r)
Definition: linalg.h:981
auto Constant(Context const *ctx, T v, Index &&...index)
Create an array with value v.
Definition: linalg.h:963
auto Zeros(Context const *ctx, Index &&...index)
Like np.zeros, return a new array of given shape and type, filled with zeros.
Definition: linalg.h:975
auto Empty(Context const *ctx, Index &&...index)
Create an array without initialization.
Definition: linalg.h:952
constexpr detail::AllTag All()
Specify all elements in the axis for slicing.
Definition: linalg.h:249
Json ArrayInterface(TensorView< T const, D > const &t)
Array Interface defined by numpy.
Definition: linalg.h:675
Order
Definition: linalg.h:258
@ kC
Definition: linalg.h:259
@ kF
Definition: linalg.h:260
JsonInteger Integer
Definition: json.h:621
#define SPAN_CHECK(cond)
Definition: span.h:127
Runtime context for XGBoost. Contains information like threads and device.
Definition: context.h:133
DeviceOrd Device() const
Get the current device and ordinal.
Definition: context.h:200
bool IsCPU() const
Is XGBoost running on CPU?
Definition: context.h:173
A type for device ordinal. The type is packed into 32-bit for efficient use in viewing types like lin...
Definition: context.h:34
bool IsCUDA() const
Definition: context.h:44
bool IsCPU() const
Definition: context.h:45
constexpr static auto CPU()
Constructor for CPU.
Definition: context.h:64
Definition: linalg.h:80
static constexpr char TypeChar()
Definition: linalg.h:45
Definition: linalg.h:232
Definition: linalg.h:82
Definition: linalg.h:85
constexpr size_t Size() const
Definition: linalg.h:88
I end
Definition: linalg.h:87
I beg
Definition: linalg.h:86