xgboost
linalg.h
Go to the documentation of this file.
1 
7 #ifndef XGBOOST_LINALG_H_
8 #define XGBOOST_LINALG_H_
9 
10 #include <dmlc/endian.h>
11 #include <xgboost/base.h>
12 #include <xgboost/context.h>
14 #include <xgboost/json.h>
15 #include <xgboost/span.h>
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cstddef> // for size_t
20 #include <cstdint> // for int32_t
21 #include <limits>
22 #include <string>
23 #include <tuple> // for make_tuple
24 #include <type_traits>
25 #include <utility>
26 #include <vector>
27 
28 #if defined(_MSC_VER)
29 #include <intrin.h>
30 #endif // defined(_MSC_VER)
31 
32 // decouple it from xgboost.
33 #ifndef LINALG_HD
34 #if defined(__CUDA__) || defined(__NVCC__)
35 #define LINALG_HD __host__ __device__
36 #else
37 #define LINALG_HD
38 #endif // defined (__CUDA__) || defined(__NVCC__)
39 #endif // LINALG_HD
40 
41 namespace xgboost::linalg {
42 namespace detail {
43 
45  template <typename T>
46  static constexpr char TypeChar() {
47  return (std::is_floating_point_v<T>
48  ? 'f'
49  : (std::is_integral_v<T> ? (std::is_signed_v<T> ? 'i' : 'u') : '\0'));
50  }
51 };
52 
53 template <size_t dim, typename S, typename Head, size_t D>
54 constexpr size_t Offset(S (&strides)[D], size_t n, Head head) {
55  static_assert(dim < D);
56  return n + head * strides[dim];
57 }
58 
59 template <size_t dim, typename S, size_t D, typename Head, typename... Tail>
60 constexpr std::enable_if_t<sizeof...(Tail) != 0, size_t> Offset(S (&strides)[D], size_t n,
61  Head head, Tail &&...rest) {
62  static_assert(dim < D);
63  return Offset<dim + 1>(strides, n + (head * strides[dim]), std::forward<Tail>(rest)...);
64 }
65 
66 template <int32_t D, bool f_array = false>
67 constexpr void CalcStride(size_t const (&shape)[D], size_t (&stride)[D]) {
68  if (f_array) {
69  stride[0] = 1;
70  for (int32_t s = 1; s < D; ++s) {
71  stride[s] = shape[s - 1] * stride[s - 1];
72  }
73  } else {
74  stride[D - 1] = 1;
75  for (int32_t s = D - 2; s >= 0; --s) {
76  stride[s] = shape[s + 1] * stride[s + 1];
77  }
78  }
79 }
80 
81 struct AllTag {};
82 
83 struct IntTag {};
84 
85 template <typename I>
86 struct RangeTag {
87  I beg;
88  I end;
89  [[nodiscard]] constexpr size_t Size() const { return end - beg; }
90 };
91 
95 template <typename T>
96 constexpr int32_t CalcSliceDim() {
97  return std::is_same_v<T, IntTag> ? 0 : 1;
98 }
99 
100 template <typename T, typename... S>
101 constexpr std::enable_if_t<sizeof...(S) != 0, int32_t> CalcSliceDim() {
102  return CalcSliceDim<T>() + CalcSliceDim<S...>();
103 }
104 
105 template <int32_t D>
106 constexpr size_t CalcSize(size_t (&shape)[D]) {
107  size_t size = 1;
108  for (auto d : shape) {
109  size *= d;
110  }
111  return size;
112 }
113 
114 template <typename S>
115 using RemoveCRType = std::remove_const_t<std::remove_reference_t<S>>;
116 
117 template <typename S>
118 using IndexToTag = std::conditional_t<std::is_integral_v<RemoveCRType<S>>, IntTag, S>;
119 
120 template <int32_t n, typename Fn>
121 LINALG_HD constexpr auto UnrollLoop(Fn fn) {
122 #if defined __CUDA_ARCH__
123 #pragma unroll n
124 #endif // defined __CUDA_ARCH__
125  for (int32_t i = 0; i < n; ++i) {
126  fn(i);
127  }
128 }
129 
130 template <typename T>
131 int32_t NativePopc(T v) {
132  int c = 0;
133  for (; v != 0; v &= v - 1) c++;
134  return c;
135 }
136 
137 inline LINALG_HD int Popc(uint32_t v) {
138 #if defined(__CUDA_ARCH__)
139  return __popc(v);
140 #elif defined(__GNUC__) || defined(__clang__)
141  return __builtin_popcount(v);
142 #elif defined(_MSC_VER)
143  return __popcnt(v);
144 #else
145  return NativePopc(v);
146 #endif // compiler
147 }
148 
149 inline LINALG_HD int Popc(uint64_t v) {
150 #if defined(__CUDA_ARCH__)
151  return __popcll(v);
152 #elif defined(__GNUC__) || defined(__clang__)
153  return __builtin_popcountll(v);
154 #elif defined(_MSC_VER) && defined(_M_X64)
155  return __popcnt64(v);
156 #else
157  return NativePopc(v);
158 #endif // compiler
159 }
160 
161 template <std::size_t D, typename Head>
162 LINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head) {
163  static_assert(std::is_integral_v<std::remove_reference_t<Head>>, "Invalid index type.");
164  arr[D - 1] = head;
165 }
166 
170 template <std::size_t D, typename Head, typename... Rest>
171 LINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head, Rest &&...index) {
172  static_assert(sizeof...(Rest) < D, "Index overflow.");
173  static_assert(std::is_integral_v<std::remove_reference_t<Head>>, "Invalid index type.");
174  arr[D - sizeof...(Rest) - 1] = head;
175  IndexToArr(arr, std::forward<Rest>(index)...);
176 }
177 
178 template <class T, std::size_t N, std::size_t... Idx>
179 constexpr auto ArrToTuple(T (&arr)[N], std::index_sequence<Idx...>) {
180  return std::make_tuple(arr[Idx]...);
181 }
182 
186 template <class T, std::size_t N>
187 constexpr auto ArrToTuple(T (&arr)[N]) {
188  return ArrToTuple(arr, std::make_index_sequence<N>{});
189 }
190 
191 // uint division optimization inspired by the CIndexer in cupy. Division operation is
192 // slow on both CPU and GPU, especially 64 bit integer. So here we first try to avoid 64
193 // bit when the index is smaller, then try to avoid division when it's exp of 2.
194 template <typename I, std::int32_t D>
196  std::size_t index[D]{0};
197  static_assert(std::is_signed_v<decltype(D)>,
198  "Don't change the type without changing the for loop.");
199  auto const sptr = shape.data();
200  for (int32_t dim = D; --dim > 0;) {
201  auto s = static_cast<std::remove_const_t<std::remove_reference_t<I>>>(sptr[dim]);
202  if (s & (s - 1)) {
203  auto t = idx / s;
204  index[dim] = idx - t * s;
205  idx = t;
206  } else { // exp of 2
207  index[dim] = idx & (s - 1);
208  idx >>= Popc(s - 1);
209  }
210  }
211  index[0] = idx;
212  return ArrToTuple(index);
213 }
214 
215 template <size_t dim, typename I, int32_t D>
216 void ReshapeImpl(size_t (&out_shape)[D], I s) {
217  static_assert(dim < D);
218  out_shape[dim] = s;
219 }
220 
221 template <size_t dim, int32_t D, typename... S, typename I,
222  std::enable_if_t<sizeof...(S) != 0> * = nullptr>
223 void ReshapeImpl(size_t (&out_shape)[D], I &&s, S &&...rest) {
224  static_assert(dim < D);
225  out_shape[dim] = s;
226  ReshapeImpl<dim + 1>(out_shape, std::forward<S>(rest)...);
227 }
228 
232 template <class...>
233 struct Conjunction : std::true_type {};
234 template <class B1>
235 struct Conjunction<B1> : B1 {};
236 template <class B1, class... Bn>
237 struct Conjunction<B1, Bn...>
238  : std::conditional_t<static_cast<bool>(B1::value), Conjunction<Bn...>, B1> {};
239 
240 template <typename... Index>
242 
243 template <typename... Index>
244 using EnableIfIntegral = std::enable_if_t<IsAllIntegral<Index...>::value>;
245 } // namespace detail
246 
250 constexpr detail::AllTag All() { return {}; }
254 template <typename I>
255 constexpr detail::RangeTag<I> Range(I beg, I end) {
256  return {beg, end};
257 }
258 
259 enum Order : std::uint8_t {
260  kC, // Row major
261  kF, // Col major
262 };
263 
277 template <typename T, std::int32_t kDim>
278 class TensorView {
279  public:
280  using ShapeT = std::size_t[kDim];
281  using StrideT = ShapeT;
282  using SizeType = std::size_t;
283 
284  using element_type = T; // NOLINT
285  using value_type = std::remove_cv_t<T>; // NOLINT
286 
287  private:
288  StrideT stride_{1};
289  ShapeT shape_{0};
290  common::Span<T> data_;
291  T *ptr_{nullptr}; // pointer of data_ to avoid bound check.
292 
293  SizeType size_{0};
294  DeviceOrd device_;
295 
296  // Unlike `Tensor`, the data_ can have arbitrary size since this is just a view.
297  LINALG_HD void CalcSize() {
298  if (data_.empty()) {
299  size_ = 0;
300  } else {
301  size_ = detail::CalcSize(shape_);
302  }
303  }
304 
305  template <size_t old_dim, size_t new_dim, std::int32_t D, typename I>
306  LINALG_HD SizeType MakeSliceDim(std::size_t new_shape[D], std::size_t new_stride[D],
307  detail::RangeTag<I> &&range) const {
308  static_assert(new_dim < D);
309  static_assert(old_dim < kDim);
310  new_stride[new_dim] = stride_[old_dim];
311  new_shape[new_dim] = range.Size();
312  assert(static_cast<decltype(shape_[old_dim])>(range.end) <= shape_[old_dim]);
313 
314  auto offset = stride_[old_dim] * range.beg;
315  return offset;
316  }
320  template <size_t old_dim, size_t new_dim, int32_t D, typename I, typename... S>
321  LINALG_HD SizeType MakeSliceDim(size_t new_shape[D], size_t new_stride[D],
322  detail::RangeTag<I> &&range, S &&...slices) const {
323  static_assert(new_dim < D);
324  static_assert(old_dim < kDim);
325  new_stride[new_dim] = stride_[old_dim];
326  new_shape[new_dim] = range.Size();
327  assert(static_cast<decltype(shape_[old_dim])>(range.end) <= shape_[old_dim]);
328 
329  auto offset = stride_[old_dim] * range.beg;
330  return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,
331  std::forward<S>(slices)...) +
332  offset;
333  }
334 
335  template <size_t old_dim, size_t new_dim, int32_t D>
336  LINALG_HD SizeType MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag) const {
337  static_assert(new_dim < D);
338  static_assert(old_dim < kDim);
339  new_stride[new_dim] = stride_[old_dim];
340  new_shape[new_dim] = shape_[old_dim];
341  return 0;
342  }
346  template <size_t old_dim, size_t new_dim, int32_t D, typename... S>
347  LINALG_HD SizeType MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag,
348  S &&...slices) const {
349  static_assert(new_dim < D);
350  static_assert(old_dim < kDim);
351  new_stride[new_dim] = stride_[old_dim];
352  new_shape[new_dim] = shape_[old_dim];
353  return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,
354  std::forward<S>(slices)...);
355  }
356 
357  template <size_t old_dim, size_t new_dim, int32_t D, typename Index>
358  LINALG_HD SizeType MakeSliceDim([[maybe_unused]] size_t new_shape[D],
359  [[maybe_unused]] size_t new_stride[D], Index i) const {
360  static_assert(old_dim < kDim);
361  return stride_[old_dim] * i;
362  }
366  template <size_t old_dim, size_t new_dim, int32_t D, typename Index, typename... S>
367  LINALG_HD std::enable_if_t<std::is_integral_v<Index>, size_t> MakeSliceDim(size_t new_shape[D],
368  size_t new_stride[D],
369  Index i,
370  S &&...slices) const {
371  static_assert(old_dim < kDim);
372  auto offset = stride_[old_dim] * i;
373  auto res =
374  MakeSliceDim<old_dim + 1, new_dim, D>(new_shape, new_stride, std::forward<S>(slices)...);
375  return res + offset;
376  }
377 
378  public:
379  size_t constexpr static kValueSize = sizeof(T);
380  size_t constexpr static kDimension = kDim;
381 
382  public:
394  template <typename I, std::int32_t D>
395  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], DeviceOrd device)
396  : TensorView{data, shape, device, Order::kC} {}
397 
398  template <typename I, int32_t D>
399  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], DeviceOrd device, Order order)
400  : data_{data}, ptr_{data_.data()}, device_{device} {
401  static_assert(D > 0 && D <= kDim, "Invalid shape.");
402  // shape
403  detail::UnrollLoop<D>([&](auto i) { shape_[i] = shape[i]; });
404  for (auto i = D; i < kDim; ++i) {
405  shape_[i] = 1;
406  }
407  // stride
408  switch (order) {
409  case Order::kC: {
410  detail::CalcStride(shape_, stride_);
411  break;
412  }
413  case Order::kF: {
414  detail::CalcStride<kDim, true>(shape_, stride_);
415  break;
416  }
417  default: {
418  SPAN_CHECK(false);
419  }
420  }
421  // size
422  this->CalcSize();
423  }
424 
429  template <typename I, std::int32_t D>
430  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], I const (&stride)[D],
431  DeviceOrd device)
432  : data_{data}, ptr_{data_.data()}, device_{device} {
433  static_assert(D == kDim, "Invalid shape & stride.");
434  detail::UnrollLoop<D>([&](auto i) {
435  shape_[i] = shape[i];
436  stride_[i] = stride[i];
437  });
438  this->CalcSize();
439  }
440 
441  template <
442  typename U,
443  std::enable_if_t<common::detail::IsAllowedElementTypeConversion<U, T>::value> * = nullptr>
444  LINALG_HD TensorView(TensorView<U, kDim> const &that) // NOLINT
445  : data_{that.Values()}, ptr_{data_.data()}, size_{that.Size()}, device_{that.Device()} {
446  detail::UnrollLoop<kDim>([&](auto i) {
447  stride_[i] = that.Stride(i);
448  shape_[i] = that.Shape(i);
449  });
450  }
451 
465  template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
466  LINALG_HD T &operator()(Index &&...index) {
467  static_assert(sizeof...(index) <= kDim, "Invalid index.");
468  size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
469  assert(offset < data_.size() && "Out of bound access.");
470  return ptr_[offset];
471  }
475  template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
476  LINALG_HD T const &operator()(Index &&...index) const {
477  static_assert(sizeof...(index) <= kDim, "Invalid index.");
478  size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
479  assert(offset < data_.size() && "Out of bound access.");
480  return ptr_[offset];
481  }
482 
496  template <typename... S>
497  LINALG_HD auto Slice(S &&...slices) const {
498  static_assert(sizeof...(slices) <= kDim, "Invalid slice.");
499  int32_t constexpr kNewDim{detail::CalcSliceDim<detail::IndexToTag<S>...>()};
500  size_t new_shape[kNewDim];
501  size_t new_stride[kNewDim];
502  auto offset = MakeSliceDim<0, 0, kNewDim>(new_shape, new_stride, std::forward<S>(slices)...);
503  // ret is a different type due to changed dimension, so we can not access its private
504  // fields.
505  TensorView<T, kNewDim> ret{data_.subspan(data_.empty() ? 0 : offset), new_shape, new_stride,
506  device_};
507  return ret;
508  }
509 
510  LINALG_HD auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }
514  LINALG_HD auto Shape(size_t i) const { return shape_[i]; }
515  LINALG_HD auto Stride() const { return common::Span<size_t const, kDim>{stride_}; }
519  LINALG_HD auto Stride(size_t i) const { return stride_[i]; }
520 
524  [[nodiscard]] LINALG_HD std::size_t Size() const { return size_; }
525  [[nodiscard]] bool Empty() const { return Size() == 0; }
529  [[nodiscard]] LINALG_HD bool Contiguous() const {
530  return data_.size() == this->Size() || this->CContiguous() || this->FContiguous();
531  }
535  [[nodiscard]] LINALG_HD bool CContiguous() const {
536  StrideT stride;
537  static_assert(std::is_same_v<decltype(stride), decltype(stride_)>);
538  // It's contiguous if the stride can be calculated from shape.
539  detail::CalcStride(shape_, stride);
541  }
545  [[nodiscard]] LINALG_HD bool FContiguous() const {
546  StrideT stride;
547  static_assert(std::is_same_v<decltype(stride), decltype(stride_)>);
548  // It's contiguous if the stride can be calculated from shape.
549  detail::CalcStride<kDim, true>(shape_, stride);
551  }
555  LINALG_HD auto Values() const -> decltype(data_) const & { return data_; }
559  LINALG_HD auto Device() const { return device_; }
560 };
561 
565 template <typename Container, typename... S,
566  std::enable_if_t<!common::detail::IsSpan<Container>::value &&
567  !std::is_pointer_v<Container>> * = nullptr>
568 auto MakeTensorView(Context const *ctx, Container &data, S &&...shape) { // NOLINT
569  using T = std::conditional_t<std::is_const_v<Container>,
570  std::add_const_t<typename Container::value_type>,
571  typename Container::value_type>;
572  std::size_t in_shape[sizeof...(S)];
573  detail::IndexToArr(in_shape, std::forward<S>(shape)...);
574  return TensorView<T, sizeof...(S)>{data, in_shape, ctx->Device()};
575 }
576 
577 template <typename T, decltype(common::dynamic_extent) ext, typename... S>
578 LINALG_HD auto MakeTensorView(DeviceOrd device, common::Span<T, ext> data, S &&...shape) {
579  std::size_t in_shape[sizeof...(S)];
580  detail::IndexToArr(in_shape, std::forward<S>(shape)...);
581  return TensorView<T, sizeof...(S)>{data, in_shape, device};
582 }
583 
584 template <typename T, decltype(common::dynamic_extent) ext, typename... S>
585 auto MakeTensorView(Context const *ctx, common::Span<T, ext> data, S &&...shape) {
586  return MakeTensorView(ctx->Device(), data, std::forward<S>(shape)...);
587 }
588 
589 template <typename T, decltype(common::dynamic_extent) ext, typename... S>
590 auto MakeTensorView(Context const *ctx, Order order, common::Span<T, ext> data, S &&...shape) {
591  std::size_t in_shape[sizeof...(S)];
592  detail::IndexToArr(in_shape, std::forward<S>(shape)...);
593  return TensorView<T, sizeof...(S)>{data, in_shape, ctx->Device(), order};
594 }
595 
596 template <typename T, typename... S>
597 auto MakeTensorView(Context const *ctx, HostDeviceVector<T> *data, S &&...shape) {
598  auto span = ctx->IsCPU() ? data->HostSpan() : data->DeviceSpan();
599  return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);
600 }
601 
602 template <typename T, typename... S>
603 auto MakeTensorView(Context const *ctx, HostDeviceVector<T> const *data, S &&...shape) {
604  auto span = ctx->IsCPU() ? data->ConstHostSpan() : data->ConstDeviceSpan();
605  return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);
606 }
607 
611 template <size_t D>
613  if (idx > std::numeric_limits<uint32_t>::max()) {
614  return detail::UnravelImpl<uint64_t, D>(static_cast<uint64_t>(idx), shape);
615  } else {
616  return detail::UnravelImpl<uint32_t, D>(static_cast<uint32_t>(idx), shape);
617  }
618 }
619 
620 template <size_t D>
621 LINALG_HD auto UnravelIndex(size_t idx, std::size_t const (&shape)[D]) {
623 }
624 
625 template <typename... S>
626 LINALG_HD auto UnravelIndex(std::size_t idx, S... shape) {
627  std::size_t s[sizeof...(S)];
628  detail::IndexToArr(s, shape...);
629  return UnravelIndex(idx, common::Span<std::size_t const, sizeof...(S)>(s));
630 }
631 
637 template <typename T>
639 
647 template <typename T>
648 auto MakeVec(T *ptr, size_t s, DeviceOrd device = DeviceOrd::CPU()) {
649  return linalg::TensorView<T, 1>{{ptr, s}, {s}, device};
650 }
651 
652 template <typename T>
654  return linalg::TensorView<T, 1>{s, {s.size()}, device};
655 }
656 
657 template <typename T>
658 auto MakeVec(std::vector<T> const &v) {
660  {v.data(), v.size()}, {v.size()}, DeviceOrd::CPU()};
661 }
662 
663 template <typename T>
665  return MakeVec(data->Device().IsCPU() ? data->HostPointer() : data->DevicePointer(), data->Size(),
666  data->Device());
667 }
668 
669 template <typename T>
670 auto MakeVec(HostDeviceVector<T> const *data) {
671  return MakeVec(data->Device().IsCPU() ? data->ConstHostPointer() : data->ConstDevicePointer(),
672  data->Size(), data->Device());
673 }
674 
680 template <typename T>
682 
689 template <typename T, std::int32_t D>
691  Json array_interface{Object{}};
692  array_interface["data"] = std::vector<Json>(2);
693  array_interface["data"][0] = Integer{reinterpret_cast<int64_t>(t.Values().data())};
694  array_interface["data"][1] = Boolean{true};
695  if (t.Device().IsCUDA()) {
696  // Change this once we have different CUDA stream.
697  array_interface["stream"] = Integer{2};
698  }
699  std::vector<Json> shape(t.Shape().size());
700  std::vector<Json> stride(t.Stride().size());
701  for (size_t i = 0; i < t.Shape().size(); ++i) {
702  shape[i] = Integer(t.Shape(i));
703  stride[i] = Integer(t.Stride(i) * sizeof(T));
704  }
705  array_interface["shape"] = Array{shape};
706  array_interface["strides"] = Array{stride};
707  array_interface["version"] = 3;
708 
709  char constexpr kT = detail::ArrayInterfaceHandler::TypeChar<T>();
710  static_assert(kT != '\0');
711  if (DMLC_LITTLE_ENDIAN) {
712  array_interface["typestr"] = String{"<" + (kT + std::to_string(sizeof(T)))};
713  } else {
714  array_interface["typestr"] = String{">" + (kT + std::to_string(sizeof(T)))};
715  }
716  return array_interface;
717 }
718 
722 template <typename T, int32_t D>
724  TensorView<T const, D> const &as_const = t;
725  auto res = ArrayInterface(as_const);
726  res["data"][1] = Boolean{false};
727  return res;
728 }
729 
733 template <typename T, int32_t D>
735  std::string str;
736  Json::Dump(ArrayInterface(t), &str);
737  return str;
738 }
739 
740 template <typename T, int32_t D>
742  std::string str;
743  Json::Dump(ArrayInterface(t), &str);
744  return str;
745 }
746 
747 template <typename T>
748 auto Make1dInterface(T const *vec, std::size_t len) {
749  Context ctx;
750  auto t = linalg::MakeTensorView(&ctx, common::Span{vec, len}, len);
751  auto str = linalg::ArrayInterfaceStr(t);
752  return str;
753 }
754 
759 template <typename T, int32_t kDim = 5>
760 class Tensor {
761  public:
762  using ShapeT = std::size_t[kDim];
763  using StrideT = ShapeT;
764 
765  private:
766  HostDeviceVector<T> data_;
767  ShapeT shape_{0};
768  Order order_{Order::kC};
769 
770  template <typename I, std::int32_t D>
771  void Initialize(I const (&shape)[D], DeviceOrd device) {
772  static_assert(D <= kDim, "Invalid shape.");
773  std::copy(shape, shape + D, shape_);
774  for (auto i = D; i < kDim; ++i) {
775  shape_[i] = 1;
776  }
777  if (!device.IsCPU()) {
778  data_.SetDevice(device);
779  data_.ConstDevicePointer(); // Pull to device;
780  }
781  CHECK_EQ(data_.Size(), detail::CalcSize(shape_));
782  }
783 
784  public:
785  Tensor() = default;
786 
793  template <typename I, int32_t D>
794  explicit Tensor(I const (&shape)[D], DeviceOrd device, Order order = kC)
795  : Tensor{common::Span<I const, D>{shape}, device, order} {}
796 
797  template <typename I, size_t D>
798  explicit Tensor(common::Span<I const, D> shape, DeviceOrd device, Order order = kC)
799  : order_{order} {
800  // No device unroll as this is a host only function.
801  std::copy(shape.data(), shape.data() + D, shape_);
802  for (auto i = D; i < kDim; ++i) {
803  shape_[i] = 1;
804  }
805  auto size = detail::CalcSize(shape_);
806  if (!device.IsCPU()) {
807  data_.SetDevice(device);
808  }
809  data_.Resize(size);
810  if (!device.IsCPU()) {
811  data_.DevicePointer(); // Pull to device
812  }
813  }
817  template <typename It, typename I, int32_t D>
818  explicit Tensor(It begin, It end, I const (&shape)[D], DeviceOrd device, Order order = kC)
819  : order_{order} {
820  auto &h_vec = data_.HostVector();
821  h_vec.insert(h_vec.begin(), begin, end);
822  // shape
823  this->Initialize(shape, device);
824  }
825 
826  template <typename I, int32_t D>
827  explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], DeviceOrd device,
828  Order order = kC)
829  : order_{order} {
830  auto &h_vec = data_.HostVector();
831  h_vec = data;
832  // shape
833  this->Initialize(shape, device);
834  }
839  template <typename... Index>
840  T &operator()(Index &&...idx) {
841  return this->HostView()(std::forward<Index>(idx)...);
842  }
847  template <typename... Index>
848  T const &operator()(Index &&...idx) const {
849  return this->HostView()(std::forward<Index>(idx)...);
850  }
851 
855  auto View(DeviceOrd device) {
856  if (device.IsCPU()) {
857  auto span = data_.HostSpan();
858  return TensorView<T, kDim>{span, shape_, device, order_};
859  } else {
860  data_.SetDevice(device);
861  auto span = data_.DeviceSpan();
862  return TensorView<T, kDim>{span, shape_, device, order_};
863  }
864  }
865  auto View(DeviceOrd device) const {
866  if (device.IsCPU()) {
867  auto span = data_.ConstHostSpan();
868  return TensorView<T const, kDim>{span, shape_, device, order_};
869  } else {
870  data_.SetDevice(device);
871  auto span = data_.ConstDeviceSpan();
872  return TensorView<T const, kDim>{span, shape_, device, order_};
873  }
874  }
875 
876  auto HostView() { return this->View(DeviceOrd::CPU()); }
877  auto HostView() const { return this->View(DeviceOrd::CPU()); }
878 
879  [[nodiscard]] std::size_t Size() const { return data_.Size(); }
880  [[nodiscard]] bool Empty() const { return Size() == 0; }
881 
882  auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }
883  auto Shape(size_t i) const { return shape_[i]; }
884 
885  HostDeviceVector<T> *Data() { return &data_; }
886  HostDeviceVector<T> const *Data() const { return &data_; }
887 
894  template <typename Fn>
895  void ModifyInplace(Fn &&fn) {
896  fn(this->Data(), common::Span<size_t, kDim>{this->shape_});
897  CHECK_EQ(this->Data()->Size(), detail::CalcSize(this->shape_))
898  << "Inconsistent size after modification.";
899  }
900 
906  template <typename... S, detail::EnableIfIntegral<S...> * = nullptr>
907  void Reshape(S &&...s) {
908  static_assert(sizeof...(S) <= kDim, "Invalid shape.");
909  detail::ReshapeImpl<0>(shape_, std::forward<S>(s)...);
910  auto constexpr kEnd = sizeof...(S);
911  static_assert(kEnd <= kDim, "Invalid shape.");
912  std::fill(shape_ + kEnd, shape_ + kDim, 1);
913  auto n = detail::CalcSize(shape_);
914  data_.Resize(n);
915  }
916 
922  template <size_t D>
924  static_assert(D <= kDim, "Invalid shape.");
925  std::copy(shape.data(), shape.data() + D, this->shape_);
926  std::fill(shape_ + D, shape_ + kDim, 1);
927  auto n = detail::CalcSize(shape_);
928  data_.Resize(n);
929  }
930 
931  template <size_t D>
932  void Reshape(size_t (&shape)[D]) {
933  this->Reshape(common::Span<size_t const, D>{shape});
934  }
938  template <typename... S>
939  auto Slice(S &&...slices) const {
940  return this->HostView().Slice(std::forward<S>(slices)...);
941  }
945  template <typename... S>
946  auto Slice(S &&...slices) {
947  return this->HostView().Slice(std::forward<S>(slices)...);
948  }
949 
953  void SetDevice(DeviceOrd device) const { data_.SetDevice(device); }
954  [[nodiscard]] DeviceOrd Device() const { return data_.Device(); }
955 };
956 
957 template <typename T>
959 
960 template <typename T>
962 
966 template <typename T, typename... Index>
967 auto Empty(Context const *ctx, Index &&...index) {
968  Tensor<T, sizeof...(Index)> t;
969  t.SetDevice(ctx->Device());
970  t.Reshape(index...);
971  return t;
972 }
973 
977 template <typename T, std::int32_t kDim>
978 auto EmptyLike(Context const *ctx, Tensor<T, kDim> const &in) {
979  Tensor<T, kDim> t;
980  t.SetDevice(ctx->Device());
981  t.Reshape(in.Shape());
982  return t;
983 }
984 
988 template <typename T, typename... Index>
989 auto Constant(Context const *ctx, T v, Index &&...index) {
990  Tensor<T, sizeof...(Index)> t;
991  t.SetDevice(ctx->Device());
992  t.Reshape(index...);
993  t.Data()->Fill(std::move(v));
994  return t;
995 }
996 
1000 template <typename T, typename... Index>
1001 auto Zeros(Context const *ctx, Index &&...index) {
1002  return Constant(ctx, static_cast<T>(0), index...);
1003 }
1004 
1005 // Only first axis is supported for now.
1006 template <typename T, int32_t D>
1007 void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {
1008  if (r.Device().IsCUDA()) {
1009  l->SetDevice(r.Device());
1010  }
1012  for (size_t i = 1; i < D; ++i) {
1013  if (shape[i] == 0) {
1014  shape[i] = r.Shape(i);
1015  } else {
1016  CHECK_EQ(shape[i], r.Shape(i));
1017  }
1018  }
1019  data->Extend(*r.Data());
1020  shape[0] = l->Shape(0) + r.Shape(0);
1021  });
1022 }
1023 
1027 template <typename T>
1029  std::size_t shape[2]{x.Shape(0), 1};
1030  std::size_t stride[2]{x.Stride(0), 1};
1031  return MatrixView<T>{x.Values(), shape, stride, x.Device()};
1032 }
1033 } // namespace xgboost::linalg
1034 
1035 #if defined(LINALG_HD)
1036 #undef LINALG_HD
1037 #endif // defined(LINALG_HD)
1038 #endif // XGBOOST_LINALG_H_
Defines configuration macros and basic types for xgboost.
Definition: host_device_vector.h:89
std::size_t Size() const
const T * ConstDevicePointer() const
void Extend(const HostDeviceVector< T > &other)
common::Span< T const > ConstHostSpan() const
Definition: host_device_vector.h:118
std::vector< T > & HostVector()
common::Span< const T > ConstDeviceSpan() const
T * HostPointer()
Definition: host_device_vector.h:115
common::Span< T > DeviceSpan()
common::Span< T > HostSpan()
Definition: host_device_vector.h:116
void SetDevice(DeviceOrd device) const
DeviceOrd Device() const
void Resize(std::size_t new_size)
const T * ConstHostPointer() const
Definition: host_device_vector.h:119
Definition: json.h:120
Describes both true and false.
Definition: json.h:354
Definition: json.h:299
Definition: json.h:223
Definition: json.h:93
Data structure representing JSON format.
Definition: json.h:396
static void Dump(Json json, std::string *out, std::ios::openmode mode=std::ios::out)
Encode the JSON object. Optional parameter mode for choosing between text and binary (ubjson) output.
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition: span.h:435
constexpr XGBOOST_DEVICE pointer data() const __span_noexcept
Definition: span.h:554
XGBOOST_DEVICE auto subspan() const -> Span< element_type, detail::ExtentValue< Extent, Offset, Count >::value >
Definition: span.h:601
constexpr XGBOOST_DEVICE index_type size() const __span_noexcept
Definition: span.h:559
constexpr XGBOOST_DEVICE bool empty() const __span_noexcept
Definition: span.h:566
A tensor view with static type and dimension. It implements indexing and slicing.
Definition: linalg.h:278
LINALG_HD std::size_t Size() const
Number of items in the tensor.
Definition: linalg.h:524
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], DeviceOrd device)
Create a tensor with data and shape.
Definition: linalg.h:395
std::remove_cv_t< T > value_type
Definition: linalg.h:285
T element_type
Definition: linalg.h:284
LINALG_HD bool CContiguous() const
Whether it's a c-contiguous array.
Definition: linalg.h:535
LINALG_HD auto Stride(size_t i) const
Definition: linalg.h:519
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], DeviceOrd device, Order order)
Definition: linalg.h:399
LINALG_HD auto Shape() const
Definition: linalg.h:510
ShapeT StrideT
Definition: linalg.h:281
constexpr static size_t kDimension
Definition: linalg.h:380
LINALG_HD auto Stride() const
Definition: linalg.h:515
LINALG_HD auto Slice(S &&...slices) const
Slice the tensor. The returned tensor has inferred dim and shape. Scalar result is not supported.
Definition: linalg.h:497
LINALG_HD auto Values() const -> decltype(data_) const &
Obtain a reference to the raw data.
Definition: linalg.h:555
LINALG_HD bool Contiguous() const
Whether this is a contiguous array, both C and F contiguous returns true.
Definition: linalg.h:529
bool Empty() const
Definition: linalg.h:525
LINALG_HD T const & operator()(Index &&...index) const
Index the tensor to obtain a scalar value.
Definition: linalg.h:476
std::size_t SizeType
Definition: linalg.h:282
LINALG_HD TensorView(TensorView< U, kDim > const &that)
Definition: linalg.h:444
LINALG_HD T & operator()(Index &&...index)
Index the tensor to obtain a scalar value.
Definition: linalg.h:466
constexpr static size_t kValueSize
Definition: linalg.h:379
LINALG_HD bool FContiguous() const
Whether it's a f-contiguous array.
Definition: linalg.h:545
LINALG_HD auto Shape(size_t i) const
Definition: linalg.h:514
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], I const (&stride)[D], DeviceOrd device)
Create a tensor with data, shape and strides. Don't use this constructor if stride can be calculated ...
Definition: linalg.h:430
std::size_t[kDim] ShapeT
Definition: linalg.h:280
LINALG_HD auto Device() const
Obtain the CUDA device ordinal.
Definition: linalg.h:559
A tensor storage. To use it for other functionality like slicing one needs to obtain a view first....
Definition: linalg.h:760
auto Slice(S &&...slices)
Get a host view on the slice.
Definition: linalg.h:946
bool Empty() const
Definition: linalg.h:880
Tensor(It begin, It end, I const (&shape)[D], DeviceOrd device, Order order=kC)
Definition: linalg.h:818
auto Slice(S &&...slices) const
Get a host view on the slice.
Definition: linalg.h:939
HostDeviceVector< T > const * Data() const
Definition: linalg.h:886
void Reshape(size_t(&shape)[D])
Definition: linalg.h:932
auto View(DeviceOrd device) const
Definition: linalg.h:865
auto HostView()
Definition: linalg.h:876
auto Shape(size_t i) const
Definition: linalg.h:883
HostDeviceVector< T > * Data()
Definition: linalg.h:885
T & operator()(Index &&...idx)
Index operator. Not thread safe, should not be used in performance critical region....
Definition: linalg.h:840
auto View(DeviceOrd device)
Get a TensorView for this tensor.
Definition: linalg.h:855
Tensor(common::Span< I const, D > shape, DeviceOrd device, Order order=kC)
Definition: linalg.h:798
auto Shape() const
Definition: linalg.h:882
void ModifyInplace(Fn &&fn)
Visitor function for modification that changes shape and data.
Definition: linalg.h:895
void SetDevice(DeviceOrd device) const
Set device ordinal for this tensor.
Definition: linalg.h:953
Tensor(std::initializer_list< T > data, I const (&shape)[D], DeviceOrd device, Order order=kC)
Definition: linalg.h:827
void Reshape(common::Span< size_t const, D > shape)
Reshape the tensor.
Definition: linalg.h:923
DeviceOrd Device() const
Definition: linalg.h:954
auto HostView() const
Definition: linalg.h:877
Tensor(I const (&shape)[D], DeviceOrd device, Order order=kC)
Create a tensor with shape and device ordinal. The storage is initialized automatically.
Definition: linalg.h:794
T const & operator()(Index &&...idx) const
Index operator. Not thread safe, should not be used in performance critical region....
Definition: linalg.h:848
std::size_t[kDim] ShapeT
Definition: linalg.h:762
void Reshape(S &&...s)
Reshape the tensor.
Definition: linalg.h:907
ShapeT StrideT
Definition: linalg.h:763
std::size_t Size() const
Definition: linalg.h:879
A device-and-host vector abstraction layer.
#define LINALG_HD
Definition: linalg.h:37
constexpr std::size_t dynamic_extent
Definition: span.h:150
Span(std::vector< T > const &) -> Span< T const >
std::conditional_t< std::is_integral_v< RemoveCRType< S > >, IntTag, S > IndexToTag
Definition: linalg.h:118
LINALG_HD auto UnravelImpl(I idx, common::Span< size_t const, D > shape)
Definition: linalg.h:195
void ReshapeImpl(size_t(&out_shape)[D], I s)
Definition: linalg.h:216
LINALG_HD int Popc(uint32_t v)
Definition: linalg.h:137
std::remove_const_t< std::remove_reference_t< S > > RemoveCRType
Definition: linalg.h:115
constexpr int32_t CalcSliceDim()
Calculate the dimension of sliced tensor.
Definition: linalg.h:96
constexpr LINALG_HD auto UnrollLoop(Fn fn)
Definition: linalg.h:121
constexpr size_t Offset(S(&strides)[D], size_t n, Head head)
Definition: linalg.h:54
LINALG_HD void IndexToArr(std::size_t(&arr)[D], Head head)
Definition: linalg.h:162
constexpr void CalcStride(size_t const (&shape)[D], size_t(&stride)[D])
Definition: linalg.h:67
constexpr auto ArrToTuple(T(&arr)[N], std::index_sequence< Idx... >)
Definition: linalg.h:179
int32_t NativePopc(T v)
Definition: linalg.h:131
std::enable_if_t< IsAllIntegral< Index... >::value > EnableIfIntegral
Definition: linalg.h:244
constexpr size_t CalcSize(size_t(&shape)[D])
Definition: linalg.h:106
Definition: linalg.h:41
constexpr detail::RangeTag< I > Range(I beg, I end)
Specify a range of elements in the axis for slicing.
Definition: linalg.h:255
auto Make1dInterface(T const *vec, std::size_t len)
Definition: linalg.h:748
auto EmptyLike(Context const *ctx, Tensor< T, kDim > const &in)
Create an array with the same shape and dtype as the input.
Definition: linalg.h:978
MatrixView< T > ExpandDim(VectorView< T > x)
Push an extra dim to the end.
Definition: linalg.h:1028
auto MakeTensorView(Context const *ctx, Container &data, S &&...shape)
Constructor for automatic type deduction.
Definition: linalg.h:568
auto ArrayInterfaceStr(TensorView< T const, D > const &t)
Return string representation of array interface.
Definition: linalg.h:734
auto MakeVec(T *ptr, size_t s, DeviceOrd device=DeviceOrd::CPU())
Create a vector view from contigious memory.
Definition: linalg.h:648
LINALG_HD auto UnravelIndex(size_t idx, common::Span< size_t const, D > shape)
Turns linear index into multi-dimension index. Similar to numpy unravel.
Definition: linalg.h:612
void Stack(Tensor< T, D > *l, Tensor< T, D > const &r)
Definition: linalg.h:1007
auto Constant(Context const *ctx, T v, Index &&...index)
Create an array with value v.
Definition: linalg.h:989
auto Zeros(Context const *ctx, Index &&...index)
Like np.zeros, return a new array of given shape and type, filled with zeros.
Definition: linalg.h:1001
auto Empty(Context const *ctx, Index &&...index)
Create an array without initialization.
Definition: linalg.h:967
constexpr detail::AllTag All()
Specify all elements in the axis for slicing.
Definition: linalg.h:250
Json ArrayInterface(TensorView< T const, D > const &t)
Array Interface defined by numpy.
Definition: linalg.h:690
Order
Definition: linalg.h:259
@ kC
Definition: linalg.h:260
@ kF
Definition: linalg.h:261
JsonInteger Integer
Definition: json.h:625
#define SPAN_CHECK(cond)
Definition: span.h:127
Runtime context for XGBoost. Contains information like threads and device.
Definition: context.h:142
DeviceOrd Device() const
Get the current device and ordinal.
Definition: context.h:207
bool IsCPU() const
Is XGBoost running on CPU?
Definition: context.h:182
A type for device ordinal. The type is packed into 32-bit for efficient use in viewing types like lin...
Definition: context.h:40
bool IsCUDA() const
Definition: context.h:55
bool IsCPU() const
Definition: context.h:56
constexpr static auto CPU()
Constructor for CPU.
Definition: context.h:73
Definition: linalg.h:81
static constexpr char TypeChar()
Definition: linalg.h:46
Definition: linalg.h:233
Definition: linalg.h:83
Definition: linalg.h:86
constexpr size_t Size() const
Definition: linalg.h:89
I end
Definition: linalg.h:88
I beg
Definition: linalg.h:87