xgboost
linalg.h
Go to the documentation of this file.
1 
6 #ifndef XGBOOST_LINALG_H_
7 #define XGBOOST_LINALG_H_
8 
9 #include <dmlc/endian.h>
10 #include <xgboost/base.h>
11 #include <xgboost/context.h>
13 #include <xgboost/json.h>
14 #include <xgboost/span.h>
15 
16 #include <algorithm>
17 #include <cassert>
18 #include <cinttypes> // for int32_t
19 #include <cstddef> // for size_t
20 #include <limits>
21 #include <string>
22 #include <tuple> // for make_tuple
23 #include <type_traits>
24 #include <utility>
25 #include <vector>
26 
27 #if defined(_MSC_VER)
28 #include <intrin.h>
29 #endif // defined(_MSC_VER)
30 
31 // decouple it from xgboost.
32 #ifndef LINALG_HD
33 #if defined(__CUDA__) || defined(__NVCC__)
34 #define LINALG_HD __host__ __device__
35 #else
36 #define LINALG_HD
37 #endif // defined (__CUDA__) || defined(__NVCC__)
38 #endif // LINALG_HD
39 
40 namespace xgboost::linalg {
41 namespace detail {
42 
44  template <typename T>
45  static constexpr char TypeChar() {
46  return (std::is_floating_point<T>::value
47  ? 'f'
48  : (std::is_integral<T>::value ? (std::is_signed<T>::value ? 'i' : 'u') : '\0'));
49  }
50 };
51 
52 template <size_t dim, typename S, typename Head, size_t D>
53 constexpr size_t Offset(S (&strides)[D], size_t n, Head head) {
54  static_assert(dim < D);
55  return n + head * strides[dim];
56 }
57 
58 template <size_t dim, typename S, size_t D, typename Head, typename... Tail>
59 constexpr std::enable_if_t<sizeof...(Tail) != 0, size_t> Offset(S (&strides)[D], size_t n,
60  Head head, Tail &&...rest) {
61  static_assert(dim < D);
62  return Offset<dim + 1>(strides, n + (head * strides[dim]), std::forward<Tail>(rest)...);
63 }
64 
65 template <int32_t D, bool f_array = false>
66 constexpr void CalcStride(size_t const (&shape)[D], size_t (&stride)[D]) {
67  if (f_array) {
68  stride[0] = 1;
69  for (int32_t s = 1; s < D; ++s) {
70  stride[s] = shape[s - 1] * stride[s - 1];
71  }
72  } else {
73  stride[D - 1] = 1;
74  for (int32_t s = D - 2; s >= 0; --s) {
75  stride[s] = shape[s + 1] * stride[s + 1];
76  }
77  }
78 }
79 
80 struct AllTag {};
81 
82 struct IntTag {};
83 
84 template <typename I>
85 struct RangeTag {
86  I beg;
87  I end;
88  [[nodiscard]] constexpr size_t Size() const { return end - beg; }
89 };
90 
94 template <typename T>
95 constexpr int32_t CalcSliceDim() {
96  return std::is_same<T, IntTag>::value ? 0 : 1;
97 }
98 
99 template <typename T, typename... S>
100 constexpr std::enable_if_t<sizeof...(S) != 0, int32_t> CalcSliceDim() {
101  return CalcSliceDim<T>() + CalcSliceDim<S...>();
102 }
103 
104 template <int32_t D>
105 constexpr size_t CalcSize(size_t (&shape)[D]) {
106  size_t size = 1;
107  for (auto d : shape) {
108  size *= d;
109  }
110  return size;
111 }
112 
113 template <typename S>
114 using RemoveCRType = std::remove_const_t<std::remove_reference_t<S>>;
115 
116 template <typename S>
117 using IndexToTag = std::conditional_t<std::is_integral<RemoveCRType<S>>::value, IntTag, S>;
118 
119 template <int32_t n, typename Fn>
120 LINALG_HD constexpr auto UnrollLoop(Fn fn) {
121 #if defined __CUDA_ARCH__
122 #pragma unroll n
123 #endif // defined __CUDA_ARCH__
124  for (int32_t i = 0; i < n; ++i) {
125  fn(i);
126  }
127 }
128 
129 template <typename T>
130 int32_t NativePopc(T v) {
131  int c = 0;
132  for (; v != 0; v &= v - 1) c++;
133  return c;
134 }
135 
136 inline LINALG_HD int Popc(uint32_t v) {
137 #if defined(__CUDA_ARCH__)
138  return __popc(v);
139 #elif defined(__GNUC__) || defined(__clang__)
140  return __builtin_popcount(v);
141 #elif defined(_MSC_VER)
142  return __popcnt(v);
143 #else
144  return NativePopc(v);
145 #endif // compiler
146 }
147 
148 inline LINALG_HD int Popc(uint64_t v) {
149 #if defined(__CUDA_ARCH__)
150  return __popcll(v);
151 #elif defined(__GNUC__) || defined(__clang__)
152  return __builtin_popcountll(v);
153 #elif defined(_MSC_VER) && defined(_M_X64)
154  return __popcnt64(v);
155 #else
156  return NativePopc(v);
157 #endif // compiler
158 }
159 
160 template <std::size_t D, typename Head>
161 LINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head) {
162  static_assert(std::is_integral<std::remove_reference_t<Head>>::value, "Invalid index type.");
163  arr[D - 1] = head;
164 }
165 
169 template <std::size_t D, typename Head, typename... Rest>
170 LINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head, Rest &&...index) {
171  static_assert(sizeof...(Rest) < D, "Index overflow.");
172  static_assert(std::is_integral<std::remove_reference_t<Head>>::value, "Invalid index type.");
173  arr[D - sizeof...(Rest) - 1] = head;
174  IndexToArr(arr, std::forward<Rest>(index)...);
175 }
176 
177 template <class T, std::size_t N, std::size_t... Idx>
178 constexpr auto ArrToTuple(T (&arr)[N], std::index_sequence<Idx...>) {
179  return std::make_tuple(arr[Idx]...);
180 }
181 
185 template <class T, std::size_t N>
186 constexpr auto ArrToTuple(T (&arr)[N]) {
187  return ArrToTuple(arr, std::make_index_sequence<N>{});
188 }
189 
190 // uint division optimization inspired by the CIndexer in cupy. Division operation is
191 // slow on both CPU and GPU, especially 64 bit integer. So here we first try to avoid 64
192 // bit when the index is smaller, then try to avoid division when it's exp of 2.
193 template <typename I, int32_t D>
195  size_t index[D]{0};
196  static_assert(std::is_signed<decltype(D)>::value,
197  "Don't change the type without changing the for loop.");
198  for (int32_t dim = D; --dim > 0;) {
199  auto s = static_cast<std::remove_const_t<std::remove_reference_t<I>>>(shape[dim]);
200  if (s & (s - 1)) {
201  auto t = idx / s;
202  index[dim] = idx - t * s;
203  idx = t;
204  } else { // exp of 2
205  index[dim] = idx & (s - 1);
206  idx >>= Popc(s - 1);
207  }
208  }
209  index[0] = idx;
210  return ArrToTuple(index);
211 }
212 
213 template <size_t dim, typename I, int32_t D>
214 void ReshapeImpl(size_t (&out_shape)[D], I s) {
215  static_assert(dim < D);
216  out_shape[dim] = s;
217 }
218 
219 template <size_t dim, int32_t D, typename... S, typename I,
220  std::enable_if_t<sizeof...(S) != 0> * = nullptr>
221 void ReshapeImpl(size_t (&out_shape)[D], I &&s, S &&...rest) {
222  static_assert(dim < D);
223  out_shape[dim] = s;
224  ReshapeImpl<dim + 1>(out_shape, std::forward<S>(rest)...);
225 }
226 
227 template <typename Fn, typename Tup, size_t... I>
228 LINALG_HD decltype(auto) constexpr Apply(Fn &&f, Tup &&t, std::index_sequence<I...>) {
229  return f(std::get<I>(t)...);
230 }
231 
238 template <typename Fn, typename Tup>
239 LINALG_HD decltype(auto) constexpr Apply(Fn &&f, Tup &&t) {
240  constexpr auto kSize = std::tuple_size<Tup>::value;
241  return Apply(std::forward<Fn>(f), std::forward<Tup>(t), std::make_index_sequence<kSize>{});
242 }
243 
247 template <class...>
248 struct Conjunction : std::true_type {};
249 template <class B1>
250 struct Conjunction<B1> : B1 {};
251 template <class B1, class... Bn>
252 struct Conjunction<B1, Bn...>
253  : std::conditional_t<static_cast<bool>(B1::value), Conjunction<Bn...>, B1> {};
254 
255 template <typename... Index>
257 
258 template <typename... Index>
259 using EnableIfIntegral = std::enable_if_t<IsAllIntegral<Index...>::value>;
260 } // namespace detail
261 
265 constexpr detail::AllTag All() { return {}; }
269 template <typename I>
270 constexpr detail::RangeTag<I> Range(I beg, I end) {
271  return {beg, end};
272 }
273 
274 enum Order : std::uint8_t {
275  kC, // Row major
276  kF, // Col major
277 };
278 
292 template <typename T, int32_t kDim>
293 class TensorView {
294  public:
295  using ShapeT = size_t[kDim];
296  using StrideT = ShapeT;
297 
298  private:
299  StrideT stride_{1};
300  ShapeT shape_{0};
301  common::Span<T> data_;
302  T *ptr_{nullptr}; // pointer of data_ to avoid bound check.
303 
304  size_t size_{0};
305  int32_t device_{-1};
306 
307  // Unlike `Tensor`, the data_ can have arbitrary size since this is just a view.
308  LINALG_HD void CalcSize() {
309  if (data_.empty()) {
310  size_ = 0;
311  } else {
312  size_ = detail::CalcSize(shape_);
313  }
314  }
315 
316  template <size_t old_dim, size_t new_dim, int32_t D, typename I>
317  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D],
318  detail::RangeTag<I> &&range) const {
319  static_assert(new_dim < D);
320  static_assert(old_dim < kDim);
321  new_stride[new_dim] = stride_[old_dim];
322  new_shape[new_dim] = range.Size();
323  assert(static_cast<decltype(shape_[old_dim])>(range.end) <= shape_[old_dim]);
324 
325  auto offset = stride_[old_dim] * range.beg;
326  return offset;
327  }
331  template <size_t old_dim, size_t new_dim, int32_t D, typename I, typename... S>
332  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D],
333  detail::RangeTag<I> &&range, S &&...slices) const {
334  static_assert(new_dim < D);
335  static_assert(old_dim < kDim);
336  new_stride[new_dim] = stride_[old_dim];
337  new_shape[new_dim] = range.Size();
338  assert(static_cast<decltype(shape_[old_dim])>(range.end) <= shape_[old_dim]);
339 
340  auto offset = stride_[old_dim] * range.beg;
341  return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,
342  std::forward<S>(slices)...) +
343  offset;
344  }
345 
346  template <size_t old_dim, size_t new_dim, int32_t D>
347  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag) const {
348  static_assert(new_dim < D);
349  static_assert(old_dim < kDim);
350  new_stride[new_dim] = stride_[old_dim];
351  new_shape[new_dim] = shape_[old_dim];
352  return 0;
353  }
357  template <size_t old_dim, size_t new_dim, int32_t D, typename... S>
358  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag,
359  S &&...slices) const {
360  static_assert(new_dim < D);
361  static_assert(old_dim < kDim);
362  new_stride[new_dim] = stride_[old_dim];
363  new_shape[new_dim] = shape_[old_dim];
364  return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,
365  std::forward<S>(slices)...);
366  }
367 
368  template <size_t old_dim, size_t new_dim, int32_t D, typename Index>
369  LINALG_HD size_t MakeSliceDim(DMLC_ATTRIBUTE_UNUSED size_t new_shape[D],
370  DMLC_ATTRIBUTE_UNUSED size_t new_stride[D], Index i) const {
371  static_assert(old_dim < kDim);
372  return stride_[old_dim] * i;
373  }
377  template <size_t old_dim, size_t new_dim, int32_t D, typename Index, typename... S>
378  LINALG_HD std::enable_if_t<std::is_integral<Index>::value, size_t> MakeSliceDim(
379  size_t new_shape[D], size_t new_stride[D], Index i, S &&...slices) const {
380  static_assert(old_dim < kDim);
381  auto offset = stride_[old_dim] * i;
382  auto res =
383  MakeSliceDim<old_dim + 1, new_dim, D>(new_shape, new_stride, std::forward<S>(slices)...);
384  return res + offset;
385  }
386 
387  public:
388  size_t constexpr static kValueSize = sizeof(T);
389  size_t constexpr static kDimension = kDim;
390 
391  public:
403  template <typename I, int32_t D>
404  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], std::int32_t device)
405  : TensorView{data, shape, device, Order::kC} {}
406 
407  template <typename I, int32_t D>
408  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], std::int32_t device, Order order)
409  : data_{data}, ptr_{data_.data()}, device_{device} {
410  static_assert(D > 0 && D <= kDim, "Invalid shape.");
411  // shape
412  detail::UnrollLoop<D>([&](auto i) { shape_[i] = shape[i]; });
413  for (auto i = D; i < kDim; ++i) {
414  shape_[i] = 1;
415  }
416  // stride
417  switch (order) {
418  case Order::kC: {
419  detail::CalcStride(shape_, stride_);
420  break;
421  }
422  case Order::kF: {
423  detail::CalcStride<kDim, true>(shape_, stride_);
424  break;
425  }
426  default: {
427  SPAN_CHECK(false);
428  }
429  }
430  // size
431  this->CalcSize();
432  }
433 
438  template <typename I, std::int32_t D>
439  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], I const (&stride)[D],
440  std::int32_t device)
441  : data_{data}, ptr_{data_.data()}, device_{device} {
442  static_assert(D == kDim, "Invalid shape & stride.");
443  detail::UnrollLoop<D>([&](auto i) {
444  shape_[i] = shape[i];
445  stride_[i] = stride[i];
446  });
447  this->CalcSize();
448  }
449 
450  template <
451  typename U,
452  std::enable_if_t<common::detail::IsAllowedElementTypeConversion<U, T>::value> * = nullptr>
453  LINALG_HD TensorView(TensorView<U, kDim> const &that) // NOLINT
454  : data_{that.Values()}, ptr_{data_.data()}, size_{that.Size()}, device_{that.DeviceIdx()} {
455  detail::UnrollLoop<kDim>([&](auto i) {
456  stride_[i] = that.Stride(i);
457  shape_[i] = that.Shape(i);
458  });
459  }
460 
474  template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
475  LINALG_HD T &operator()(Index &&...index) {
476  static_assert(sizeof...(index) <= kDim, "Invalid index.");
477  size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
478  assert(offset < data_.size() && "Out of bound access.");
479  return ptr_[offset];
480  }
484  template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
485  LINALG_HD T const &operator()(Index &&...index) const {
486  static_assert(sizeof...(index) <= kDim, "Invalid index.");
487  size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
488  assert(offset < data_.size() && "Out of bound access.");
489  return ptr_[offset];
490  }
491 
505  template <typename... S>
506  LINALG_HD auto Slice(S &&...slices) const {
507  static_assert(sizeof...(slices) <= kDim, "Invalid slice.");
508  int32_t constexpr kNewDim{detail::CalcSliceDim<detail::IndexToTag<S>...>()};
509  size_t new_shape[kNewDim];
510  size_t new_stride[kNewDim];
511  auto offset = MakeSliceDim<0, 0, kNewDim>(new_shape, new_stride, std::forward<S>(slices)...);
512  // ret is a different type due to changed dimension, so we can not access its private
513  // fields.
514  TensorView<T, kNewDim> ret{data_.subspan(data_.empty() ? 0 : offset), new_shape, new_stride,
515  device_};
516  return ret;
517  }
518 
519  LINALG_HD auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }
523  LINALG_HD auto Shape(size_t i) const { return shape_[i]; }
524  LINALG_HD auto Stride() const { return common::Span<size_t const, kDim>{stride_}; }
528  LINALG_HD auto Stride(size_t i) const { return stride_[i]; }
529 
533  [[nodiscard]] LINALG_HD std::size_t Size() const { return size_; }
537  [[nodiscard]] LINALG_HD bool Contiguous() const {
538  return data_.size() == this->Size() || this->CContiguous() || this->FContiguous();
539  }
543  [[nodiscard]] LINALG_HD bool CContiguous() const {
544  StrideT stride;
545  static_assert(std::is_same<decltype(stride), decltype(stride_)>::value);
546  // It's contiguous if the stride can be calculated from shape.
547  detail::CalcStride(shape_, stride);
549  }
553  [[nodiscard]] LINALG_HD bool FContiguous() const {
554  StrideT stride;
555  static_assert(std::is_same<decltype(stride), decltype(stride_)>::value);
556  // It's contiguous if the stride can be calculated from shape.
557  detail::CalcStride<kDim, true>(shape_, stride);
559  }
563  LINALG_HD auto Values() const -> decltype(data_) const & { return data_; }
567  LINALG_HD auto DeviceIdx() const { return device_; }
568 };
569 
573 template <typename Container, typename... S,
574  std::enable_if_t<!common::detail::IsSpan<Container>::value &&
575  !std::is_pointer_v<Container>> * = nullptr>
576 auto MakeTensorView(Context const *ctx, Container &data, S &&...shape) { // NOLINT
577  using T = std::conditional_t<std::is_const_v<Container>,
578  std::add_const_t<typename Container::value_type>,
579  typename Container::value_type>;
580  std::size_t in_shape[sizeof...(S)];
581  detail::IndexToArr(in_shape, std::forward<S>(shape)...);
582  return TensorView<T, sizeof...(S)>{data, in_shape, ctx->gpu_id};
583 }
584 
585 template <typename T, typename... S>
586 LINALG_HD auto MakeTensorView(std::int32_t device, common::Span<T> data, S &&...shape) {
587  std::size_t in_shape[sizeof...(S)];
588  detail::IndexToArr(in_shape, std::forward<S>(shape)...);
589  return TensorView<T, sizeof...(S)>{data, in_shape, device};
590 }
591 
592 template <typename T, typename... S>
593 auto MakeTensorView(Context const *ctx, common::Span<T> data, S &&...shape) {
594  return MakeTensorView(ctx->gpu_id, data, std::forward<S>(shape)...);
595 }
596 
597 template <typename T, typename... S>
598 auto MakeTensorView(Context const *ctx, HostDeviceVector<T> *data, S &&...shape) {
599  auto span = ctx->IsCPU() ? data->HostSpan() : data->DeviceSpan();
600  return MakeTensorView(ctx->gpu_id, span, std::forward<S>(shape)...);
601 }
602 
603 template <typename T, typename... S>
604 auto MakeTensorView(Context const *ctx, HostDeviceVector<T> const *data, S &&...shape) {
605  auto span = ctx->IsCPU() ? data->ConstHostSpan() : data->ConstDeviceSpan();
606  return MakeTensorView(ctx->gpu_id, span, std::forward<S>(shape)...);
607 }
608 
612 template <size_t D>
614  if (idx > std::numeric_limits<uint32_t>::max()) {
615  return detail::UnravelImpl<uint64_t, D>(static_cast<uint64_t>(idx), shape);
616  } else {
617  return detail::UnravelImpl<uint32_t, D>(static_cast<uint32_t>(idx), shape);
618  }
619 }
620 
621 template <size_t D>
622 LINALG_HD auto UnravelIndex(size_t idx, std::size_t const (&shape)[D]) {
624 }
625 
626 template <typename... S>
627 LINALG_HD auto UnravelIndex(std::size_t idx, S... shape) {
628  std::size_t s[sizeof...(S)];
629  detail::IndexToArr(s, shape...);
630  return UnravelIndex(idx, common::Span<std::size_t const, sizeof...(S)>(s));
631 }
632 
638 template <typename T>
640 
648 template <typename T>
649 auto MakeVec(T *ptr, size_t s, int32_t device = -1) {
650  return linalg::TensorView<T, 1>{{ptr, s}, {s}, device};
651 }
652 
653 template <typename T>
655  return MakeVec(data->DeviceIdx() == -1 ? data->HostPointer() : data->DevicePointer(),
656  data->Size(), data->DeviceIdx());
657 }
658 
659 template <typename T>
660 auto MakeVec(HostDeviceVector<T> const *data) {
661  return MakeVec(data->DeviceIdx() == -1 ? data->ConstHostPointer() : data->ConstDevicePointer(),
662  data->Size(), data->DeviceIdx());
663 }
664 
670 template <typename T>
672 
679 template <typename T, int32_t D>
681  Json array_interface{Object{}};
682  array_interface["data"] = std::vector<Json>(2);
683  array_interface["data"][0] = Integer{reinterpret_cast<int64_t>(t.Values().data())};
684  array_interface["data"][1] = Boolean{true};
685  if (t.DeviceIdx() >= 0) {
686  // Change this once we have different CUDA stream.
687  array_interface["stream"] = Null{};
688  }
689  std::vector<Json> shape(t.Shape().size());
690  std::vector<Json> stride(t.Stride().size());
691  for (size_t i = 0; i < t.Shape().size(); ++i) {
692  shape[i] = Integer(t.Shape(i));
693  stride[i] = Integer(t.Stride(i) * sizeof(T));
694  }
695  array_interface["shape"] = Array{shape};
696  array_interface["strides"] = Array{stride};
697  array_interface["version"] = 3;
698 
699  char constexpr kT = detail::ArrayInterfaceHandler::TypeChar<T>();
700  static_assert(kT != '\0');
701  if (DMLC_LITTLE_ENDIAN) {
702  array_interface["typestr"] = String{"<" + (kT + std::to_string(sizeof(T)))};
703  } else {
704  array_interface["typestr"] = String{">" + (kT + std::to_string(sizeof(T)))};
705  }
706  return array_interface;
707 }
708 
712 template <typename T, int32_t D>
714  TensorView<T const, D> const &as_const = t;
715  auto res = ArrayInterface(as_const);
716  res["data"][1] = Boolean{false};
717  return res;
718 }
719 
723 template <typename T, int32_t D>
725  std::string str;
726  Json::Dump(ArrayInterface(t), &str);
727  return str;
728 }
729 
730 template <typename T, int32_t D>
732  std::string str;
733  Json::Dump(ArrayInterface(t), &str);
734  return str;
735 }
736 
741 template <typename T, int32_t kDim = 5>
742 class Tensor {
743  public:
744  using ShapeT = size_t[kDim];
745  using StrideT = ShapeT;
746 
747  private:
748  HostDeviceVector<T> data_;
749  ShapeT shape_{0};
750  Order order_{Order::kC};
751 
752  template <typename I, std::int32_t D>
753  void Initialize(I const (&shape)[D], std::int32_t device) {
754  static_assert(D <= kDim, "Invalid shape.");
755  std::copy(shape, shape + D, shape_);
756  for (auto i = D; i < kDim; ++i) {
757  shape_[i] = 1;
758  }
759  if (device >= 0) {
760  data_.SetDevice(device);
761  data_.ConstDevicePointer(); // Pull to device;
762  }
763  CHECK_EQ(data_.Size(), detail::CalcSize(shape_));
764  }
765 
766  public:
767  Tensor() = default;
768 
775  template <typename I, int32_t D>
776  explicit Tensor(I const (&shape)[D], std::int32_t device, Order order = kC)
777  : Tensor{common::Span<I const, D>{shape}, device, order} {}
778 
779  template <typename I, size_t D>
780  explicit Tensor(common::Span<I const, D> shape, std::int32_t device, Order order = kC)
781  : order_{order} {
782  // No device unroll as this is a host only function.
783  std::copy(shape.data(), shape.data() + D, shape_);
784  for (auto i = D; i < kDim; ++i) {
785  shape_[i] = 1;
786  }
787  auto size = detail::CalcSize(shape_);
788  if (device >= 0) {
789  data_.SetDevice(device);
790  }
791  data_.Resize(size);
792  if (device >= 0) {
793  data_.DevicePointer(); // Pull to device
794  }
795  }
799  template <typename It, typename I, int32_t D>
800  explicit Tensor(It begin, It end, I const (&shape)[D], std::int32_t device, Order order = kC)
801  : order_{order} {
802  auto &h_vec = data_.HostVector();
803  h_vec.insert(h_vec.begin(), begin, end);
804  // shape
805  this->Initialize(shape, device);
806  }
807 
808  template <typename I, int32_t D>
809  explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], std::int32_t device,
810  Order order = kC)
811  : order_{order} {
812  auto &h_vec = data_.HostVector();
813  h_vec = data;
814  // shape
815  this->Initialize(shape, device);
816  }
821  template <typename... Index>
822  T &operator()(Index &&...idx) {
823  return this->HostView()(std::forward<Index>(idx)...);
824  }
829  template <typename... Index>
830  T const &operator()(Index &&...idx) const {
831  return this->HostView()(std::forward<Index>(idx)...);
832  }
833 
837  TensorView<T, kDim> View(int32_t device) {
838  if (device >= 0) {
839  data_.SetDevice(device);
840  auto span = data_.DeviceSpan();
841  return {span, shape_, device, order_};
842  } else {
843  auto span = data_.HostSpan();
844  return {span, shape_, device, order_};
845  }
846  }
847  TensorView<T const, kDim> View(int32_t device) const {
848  if (device >= 0) {
849  data_.SetDevice(device);
850  auto span = data_.ConstDeviceSpan();
851  return {span, shape_, device, order_};
852  } else {
853  auto span = data_.ConstHostSpan();
854  return {span, shape_, device, order_};
855  }
856  }
857 
858  auto HostView() const { return this->View(-1); }
859  auto HostView() { return this->View(-1); }
860 
861  [[nodiscard]] size_t Size() const { return data_.Size(); }
862  auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }
863  auto Shape(size_t i) const { return shape_[i]; }
864 
865  HostDeviceVector<T> *Data() { return &data_; }
866  HostDeviceVector<T> const *Data() const { return &data_; }
867 
874  template <typename Fn>
875  void ModifyInplace(Fn &&fn) {
876  fn(this->Data(), common::Span<size_t, kDim>{this->shape_});
877  CHECK_EQ(this->Data()->Size(), detail::CalcSize(this->shape_))
878  << "Inconsistent size after modification.";
879  }
880 
886  template <typename... S, detail::EnableIfIntegral<S...> * = nullptr>
887  void Reshape(S &&...s) {
888  static_assert(sizeof...(S) <= kDim, "Invalid shape.");
889  detail::ReshapeImpl<0>(shape_, std::forward<S>(s)...);
890  auto constexpr kEnd = sizeof...(S);
891  static_assert(kEnd <= kDim, "Invalid shape.");
892  std::fill(shape_ + kEnd, shape_ + kDim, 1);
893  auto n = detail::CalcSize(shape_);
894  data_.Resize(n);
895  }
896 
902  template <size_t D>
904  static_assert(D <= kDim, "Invalid shape.");
905  std::copy(shape.data(), shape.data() + D, this->shape_);
906  std::fill(shape_ + D, shape_ + kDim, 1);
907  auto n = detail::CalcSize(shape_);
908  data_.Resize(n);
909  }
910 
911  template <size_t D>
912  void Reshape(size_t (&shape)[D]) {
913  this->Reshape(common::Span<size_t const, D>{shape});
914  }
918  template <typename... S>
919  auto Slice(S &&...slices) const {
920  return this->HostView().Slice(std::forward<S>(slices)...);
921  }
925  template <typename... S>
926  auto Slice(S &&...slices) {
927  return this->HostView().Slice(std::forward<S>(slices)...);
928  }
929 
933  void SetDevice(int32_t device) const { data_.SetDevice(device); }
934  [[nodiscard]] int32_t DeviceIdx() const { return data_.DeviceIdx(); }
935 };
936 
937 template <typename T>
939 
940 template <typename T>
942 
946 template <typename T, typename... Index>
947 auto Empty(Context const *ctx, Index &&...index) {
948  Tensor<T, sizeof...(Index)> t;
949  t.SetDevice(ctx->gpu_id);
950  t.Reshape(index...);
951  return t;
952 }
953 
957 template <typename T, typename... Index>
958 auto Constant(Context const *ctx, T v, Index &&...index) {
959  Tensor<T, sizeof...(Index)> t;
960  t.SetDevice(ctx->gpu_id);
961  t.Reshape(index...);
962  t.Data()->Fill(std::move(v));
963  return t;
964 }
965 
969 template <typename T, typename... Index>
970 auto Zeros(Context const *ctx, Index &&...index) {
971  return Constant(ctx, static_cast<T>(0), index...);
972 }
973 
974 // Only first axis is supported for now.
975 template <typename T, int32_t D>
976 void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {
977  if (r.DeviceIdx() >= 0) {
978  l->SetDevice(r.DeviceIdx());
979  }
981  for (size_t i = 1; i < D; ++i) {
982  if (shape[i] == 0) {
983  shape[i] = r.Shape(i);
984  } else {
985  CHECK_EQ(shape[i], r.Shape(i));
986  }
987  }
988  data->Extend(*r.Data());
989  shape[0] = l->Shape(0) + r.Shape(0);
990  });
991 }
992 } // namespace xgboost::linalg
993 
994 #if defined(LINALG_HD)
995 #undef LINALG_HD
996 #endif // defined(LINALG_HD)
997 #endif // XGBOOST_LINALG_H_
Defines configuration macros and basic types for xgboost.
Definition: host_device_vector.h:87
const T * ConstDevicePointer() const
void Extend(const HostDeviceVector< T > &other)
common::Span< T const > ConstHostSpan() const
Definition: host_device_vector.h:115
std::vector< T > & HostVector()
void Resize(size_t new_size, T v=T())
common::Span< const T > ConstDeviceSpan() const
T * HostPointer()
Definition: host_device_vector.h:112
void SetDevice(int device) const
common::Span< T > DeviceSpan()
common::Span< T > HostSpan()
Definition: host_device_vector.h:113
const T * ConstHostPointer() const
Definition: host_device_vector.h:116
Definition: json.h:113
Describes both true and false.
Definition: json.h:312
Definition: json.h:253
Definition: json.h:296
Definition: json.h:190
Definition: json.h:87
Data structure representing JSON format.
Definition: json.h:357
static void Dump(Json json, std::string *out, std::ios::openmode mode=std::ios::out)
Encode the JSON object. Optional parameter mode for choosing between text and binary (ubjson) output.
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition: span.h:424
constexpr XGBOOST_DEVICE pointer data() const __span_noexcept
Definition: span.h:549
XGBOOST_DEVICE auto subspan() const -> Span< element_type, detail::ExtentValue< Extent, Offset, Count >::value >
Definition: span.h:596
constexpr XGBOOST_DEVICE index_type size() const __span_noexcept
Definition: span.h:554
constexpr XGBOOST_DEVICE bool empty() const __span_noexcept
Definition: span.h:561
A tensor view with static type and dimension. It implements indexing and slicing.
Definition: linalg.h:293
LINALG_HD auto DeviceIdx() const
Obtain the CUDA device ordinal.
Definition: linalg.h:567
LINALG_HD std::size_t Size() const
Number of items in the tensor.
Definition: linalg.h:533
size_t[kDim] ShapeT
Definition: linalg.h:295
LINALG_HD bool CContiguous() const
Whether it's a c-contiguous array.
Definition: linalg.h:543
LINALG_HD auto Stride(size_t i) const
Definition: linalg.h:528
LINALG_HD auto Shape() const
Definition: linalg.h:519
ShapeT StrideT
Definition: linalg.h:296
constexpr static size_t kDimension
Definition: linalg.h:389
LINALG_HD auto Stride() const
Definition: linalg.h:524
LINALG_HD auto Slice(S &&...slices) const
Slice the tensor. The returned tensor has inferred dim and shape. Scalar result is not supported.
Definition: linalg.h:506
LINALG_HD auto Values() const -> decltype(data_) const &
Obtain a reference to the raw data.
Definition: linalg.h:563
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], std::int32_t device, Order order)
Definition: linalg.h:408
LINALG_HD bool Contiguous() const
Whether this is a contiguous array, both C and F contiguous returns true.
Definition: linalg.h:537
LINALG_HD T const & operator()(Index &&...index) const
Index the tensor to obtain a scalar value.
Definition: linalg.h:485
LINALG_HD TensorView(TensorView< U, kDim > const &that)
Definition: linalg.h:453
LINALG_HD T & operator()(Index &&...index)
Index the tensor to obtain a scalar value.
Definition: linalg.h:475
constexpr static size_t kValueSize
Definition: linalg.h:388
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], std::int32_t device)
Create a tensor with data and shape.
Definition: linalg.h:404
LINALG_HD bool FContiguous() const
Whether it's a f-contiguous array.
Definition: linalg.h:553
LINALG_HD auto Shape(size_t i) const
Definition: linalg.h:523
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], I const (&stride)[D], std::int32_t device)
Create a tensor with data, shape and strides. Don't use this constructor if stride can be calculated ...
Definition: linalg.h:439
A tensor storage. To use it for other functionality like slicing one needs to obtain a view first....
Definition: linalg.h:742
auto Slice(S &&...slices)
Get a host view on the slice.
Definition: linalg.h:926
TensorView< T const, kDim > View(int32_t device) const
Definition: linalg.h:847
TensorView< T, kDim > View(int32_t device)
Get a TensorView for this tensor.
Definition: linalg.h:837
auto Slice(S &&...slices) const
Get a host view on the slice.
Definition: linalg.h:919
size_t[kDim] ShapeT
Definition: linalg.h:744
void SetDevice(int32_t device) const
Set device ordinal for this tensor.
Definition: linalg.h:933
HostDeviceVector< T > const * Data() const
Definition: linalg.h:866
void Reshape(size_t(&shape)[D])
Definition: linalg.h:912
auto HostView()
Definition: linalg.h:859
auto Shape(size_t i) const
Definition: linalg.h:863
Tensor(common::Span< I const, D > shape, std::int32_t device, Order order=kC)
Definition: linalg.h:780
HostDeviceVector< T > * Data()
Definition: linalg.h:865
T & operator()(Index &&...idx)
Index operator. Not thread safe, should not be used in performance critical region....
Definition: linalg.h:822
Tensor(I const (&shape)[D], std::int32_t device, Order order=kC)
Create a tensor with shape and device ordinal. The storage is initialized automatically.
Definition: linalg.h:776
auto Shape() const
Definition: linalg.h:862
Tensor(std::initializer_list< T > data, I const (&shape)[D], std::int32_t device, Order order=kC)
Definition: linalg.h:809
void ModifyInplace(Fn &&fn)
Visitor function for modification that changes shape and data.
Definition: linalg.h:875
void Reshape(common::Span< size_t const, D > shape)
Reshape the tensor.
Definition: linalg.h:903
auto HostView() const
Definition: linalg.h:858
T const & operator()(Index &&...idx) const
Index operator. Not thread safe, should not be used in performance critical region....
Definition: linalg.h:830
size_t Size() const
Definition: linalg.h:861
void Reshape(S &&...s)
Reshape the tensor.
Definition: linalg.h:887
int32_t DeviceIdx() const
Definition: linalg.h:934
ShapeT StrideT
Definition: linalg.h:745
Tensor(It begin, It end, I const (&shape)[D], std::int32_t device, Order order=kC)
Definition: linalg.h:800
A device-and-host vector abstraction layer.
#define LINALG_HD
Definition: linalg.h:36
Definition: intrusive_ptr.h:207
LINALG_HD auto UnravelImpl(I idx, common::Span< size_t const, D > shape)
Definition: linalg.h:194
void ReshapeImpl(size_t(&out_shape)[D], I s)
Definition: linalg.h:214
LINALG_HD int Popc(uint32_t v)
Definition: linalg.h:136
std::remove_const_t< std::remove_reference_t< S > > RemoveCRType
Definition: linalg.h:114
constexpr int32_t CalcSliceDim()
Calculate the dimension of sliced tensor.
Definition: linalg.h:95
constexpr LINALG_HD auto UnrollLoop(Fn fn)
Definition: linalg.h:120
std::conditional_t< std::is_integral< RemoveCRType< S > >::value, IntTag, S > IndexToTag
Definition: linalg.h:117
constexpr size_t Offset(S(&strides)[D], size_t n, Head head)
Definition: linalg.h:53
decltype(auto) constexpr LINALG_HD Apply(Fn &&f, Tup &&t, std::index_sequence< I... >)
Definition: linalg.h:228
LINALG_HD void IndexToArr(std::size_t(&arr)[D], Head head)
Definition: linalg.h:161
constexpr void CalcStride(size_t const (&shape)[D], size_t(&stride)[D])
Definition: linalg.h:66
constexpr auto ArrToTuple(T(&arr)[N], std::index_sequence< Idx... >)
Definition: linalg.h:178
int32_t NativePopc(T v)
Definition: linalg.h:130
std::enable_if_t< IsAllIntegral< Index... >::value > EnableIfIntegral
Definition: linalg.h:259
constexpr size_t CalcSize(size_t(&shape)[D])
Definition: linalg.h:105
Definition: linalg.h:40
constexpr detail::RangeTag< I > Range(I beg, I end)
Specify a range of elements in the axis for slicing.
Definition: linalg.h:270
auto MakeVec(T *ptr, size_t s, int32_t device=-1)
Create a vector view from contigious memory.
Definition: linalg.h:649
auto MakeTensorView(Context const *ctx, Container &data, S &&...shape)
Constructor for automatic type deduction.
Definition: linalg.h:576
auto ArrayInterfaceStr(TensorView< T const, D > const &t)
Return string representation of array interface.
Definition: linalg.h:724
LINALG_HD auto UnravelIndex(size_t idx, common::Span< size_t const, D > shape)
Turns linear index into multi-dimension index. Similar to numpy unravel.
Definition: linalg.h:613
void Stack(Tensor< T, D > *l, Tensor< T, D > const &r)
Definition: linalg.h:976
auto Constant(Context const *ctx, T v, Index &&...index)
Create an array with value v.
Definition: linalg.h:958
auto Zeros(Context const *ctx, Index &&...index)
Like np.zeros, return a new array of given shape and type, filled with zeros.
Definition: linalg.h:970
auto Empty(Context const *ctx, Index &&...index)
Create an array without initialization.
Definition: linalg.h:947
constexpr detail::AllTag All()
Specify all elements in the axis for slicing.
Definition: linalg.h:265
Json ArrayInterface(TensorView< T const, D > const &t)
Array Interface defined by numpy.
Definition: linalg.h:680
Order
Definition: linalg.h:274
@ kC
Definition: linalg.h:275
@ kF
Definition: linalg.h:276
JsonInteger Integer
Definition: json.h:593
#define SPAN_CHECK(cond)
Definition: span.h:119
Runtime context for XGBoost. Contains information like threads and device.
Definition: context.h:84
bool IsCPU() const
Is XGBoost running on CPU?
Definition: context.h:133
std::int32_t gpu_id
Definition: context.h:107
Definition: linalg.h:80
static constexpr char TypeChar()
Definition: linalg.h:45
Definition: linalg.h:248
Definition: linalg.h:82
Definition: linalg.h:85
constexpr size_t Size() const
Definition: linalg.h:88
I end
Definition: linalg.h:87
I beg
Definition: linalg.h:86