6 #ifndef XGBOOST_LINALG_H_
7 #define XGBOOST_LINALG_H_
9 #include <dmlc/endian.h>
21 #include <type_traits>
27 #if defined(__CUDA__) || defined(__NVCC__)
28 #define LINALG_HD __host__ __device__
41 return (std::is_floating_point<T>::value
43 : (std::is_integral<T>::value ? (std::is_signed<T>::value ?
'i' :
'u') :
'\0'));
47 template <
size_t dim,
typename S,
typename Head,
size_t D>
48 constexpr
size_t Offset(S (&strides)[D],
size_t n, Head head) {
49 static_assert(dim < D,
"");
50 return n + head * strides[dim];
53 template <
size_t dim,
typename S,
size_t D,
typename Head,
typename... Tail>
54 constexpr std::enable_if_t<
sizeof...(Tail) != 0,
size_t>
Offset(S (&strides)[D],
size_t n,
55 Head head, Tail &&...rest) {
56 static_assert(dim < D,
"");
57 return Offset<dim + 1>(strides, n + (head * strides[dim]), std::forward<Tail>(rest)...);
60 template <
int32_t D,
bool f_array = false>
61 constexpr
void CalcStride(
size_t const (&shape)[D],
size_t (&stride)[D]) {
64 for (int32_t s = 1; s < D; ++s) {
65 stride[s] = shape[s - 1] * stride[s - 1];
69 for (int32_t s = D - 2; s >= 0; --s) {
70 stride[s] = shape[s + 1] * stride[s + 1];
91 return std::is_same<T, IntTag>::value ? 0 : 1;
94 template <
typename T,
typename... S>
95 constexpr std::enable_if_t<
sizeof...(S) != 0, int32_t>
CalcSliceDim() {
102 for (
auto d : shape) {
108 template <
typename S>
111 template <
typename S>
112 using IndexToTag = std::conditional_t<std::is_integral<RemoveCRType<S>>::value,
IntTag, S>;
114 template <
int32_t n,
typename Fn>
116 #if defined __CUDA_ARCH__
119 for (int32_t i = 0; i < n; ++i) {
124 template <
typename T>
127 for (; v != 0; v &= v - 1) c++;
132 #if defined(__CUDA_ARCH__)
134 #elif defined(__GNUC__) || defined(__clang__)
135 return __builtin_popcount(v);
136 #elif defined(_MSC_VER)
144 #if defined(__CUDA_ARCH__)
146 #elif defined(__GNUC__) || defined(__clang__)
147 return __builtin_popcountll(v);
148 #elif defined(_MSC_VER)
149 return __popcnt64(v);
155 template <
class T, std::size_t N, std::size_t... Idx>
156 constexpr
auto Arr2Tup(T (&arr)[N], std::index_sequence<Idx...>) {
157 return std::make_tuple(arr[Idx]...);
160 template <
class T, std::
size_t N>
162 return Arr2Tup(arr, std::make_index_sequence<N>{});
168 template <
typename I,
int32_t D>
171 static_assert(std::is_signed<decltype(D)>::value,
172 "Don't change the type without changing the for loop.");
173 for (int32_t dim = D; --dim > 0;) {
174 auto s =
static_cast<std::remove_const_t<std::remove_reference_t<I>
>>(shape[dim]);
177 index[dim] = idx - t * s;
180 index[dim] = idx & (s - 1);
188 template <
size_t dim,
typename I,
int32_t D>
190 static_assert(dim < D,
"");
194 template <
size_t dim, int32_t D,
typename... S,
typename I,
195 std::enable_if_t<
sizeof...(S) != 0> * =
nullptr>
197 static_assert(dim < D,
"");
199 ReshapeImpl<dim + 1>(out_shape, std::forward<S>(rest)...);
202 template <
typename Fn,
typename Tup,
size_t... I>
204 return f(std::get<I>(t)...);
213 template <
typename Fn,
typename Tup>
215 constexpr
auto kSize = std::tuple_size<Tup>::value;
216 return Apply(std::forward<Fn>(f), std::forward<Tup>(t), std::make_index_sequence<kSize>{});
226 template <
class B1,
class... Bn>
227 struct Conjunction<B1, Bn...> : std::conditional_t<bool(B1::value), Conjunction<Bn...>, B1> {};
229 template <
typename... Index>
232 template <
typename... Index>
243 template <
typename I>
261 template <
typename T,
int32_t kDim>
285 template <
size_t old_dim,
size_t new_dim,
int32_t D,
typename I>
286 LINALG_HD size_t MakeSliceDim(
size_t new_shape[D],
size_t new_stride[D],
287 detail::RangeTag<I> &&range)
const {
288 static_assert(new_dim < D,
"");
289 static_assert(old_dim < kDim,
"");
290 new_stride[new_dim] = stride_[old_dim];
291 new_shape[new_dim] = range.Size();
292 assert(
static_cast<decltype(shape_[old_dim])
>(range.end) <= shape_[old_dim]);
294 auto offset = stride_[old_dim] * range.beg;
300 template <
size_t old_dim,
size_t new_dim, int32_t D,
typename I,
typename... S>
301 LINALG_HD size_t MakeSliceDim(
size_t new_shape[D],
size_t new_stride[D],
302 detail::RangeTag<I> &&range, S &&...slices)
const {
303 static_assert(new_dim < D,
"");
304 static_assert(old_dim < kDim,
"");
305 new_stride[new_dim] = stride_[old_dim];
306 new_shape[new_dim] = range.Size();
307 assert(
static_cast<decltype(shape_[old_dim])
>(range.end) <= shape_[old_dim]);
309 auto offset = stride_[old_dim] * range.beg;
310 return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,
311 std::forward<S>(slices)...) +
315 template <
size_t old_dim,
size_t new_dim,
int32_t D>
316 LINALG_HD size_t MakeSliceDim(
size_t new_shape[D],
size_t new_stride[D], detail::AllTag)
const {
317 static_assert(new_dim < D,
"");
318 static_assert(old_dim < kDim,
"");
319 new_stride[new_dim] = stride_[old_dim];
320 new_shape[new_dim] = shape_[old_dim];
326 template <
size_t old_dim,
size_t new_dim, int32_t D,
typename... S>
327 LINALG_HD size_t MakeSliceDim(
size_t new_shape[D],
size_t new_stride[D], detail::AllTag,
328 S &&...slices)
const {
329 static_assert(new_dim < D,
"");
330 static_assert(old_dim < kDim,
"");
331 new_stride[new_dim] = stride_[old_dim];
332 new_shape[new_dim] = shape_[old_dim];
333 return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,
334 std::forward<S>(slices)...);
337 template <
size_t old_dim,
size_t new_dim,
int32_t D,
typename Index>
338 LINALG_HD size_t MakeSliceDim(DMLC_ATTRIBUTE_UNUSED
size_t new_shape[D],
339 DMLC_ATTRIBUTE_UNUSED
size_t new_stride[D], Index i)
const {
340 static_assert(old_dim < kDim,
"");
341 return stride_[old_dim] * i;
346 template <
size_t old_dim,
size_t new_dim, int32_t D,
typename Index,
typename... S>
347 LINALG_HD std::enable_if_t<std::is_integral<Index>::value,
size_t> MakeSliceDim(
348 size_t new_shape[D],
size_t new_stride[D], Index i, S &&...slices)
const {
349 static_assert(old_dim < kDim,
"");
350 auto offset = stride_[old_dim] * i;
352 MakeSliceDim<old_dim + 1, new_dim, D>(new_shape, new_stride, std::forward<S>(slices)...);
372 template <
typename I,
int32_t D>
374 : data_{data}, ptr_{data_.data()}, device_{device} {
375 static_assert(D > 0 && D <= kDim,
"Invalid shape.");
377 detail::UnrollLoop<D>([&](
auto i) { shape_[i] = shape[i]; });
378 for (
auto i = D; i < kDim; ++i) {
391 template <
typename I,
int32_t D>
394 : data_{data}, ptr_{data_.data()}, device_{device} {
395 static_assert(D == kDim,
"Invalid shape & stride.");
396 detail::UnrollLoop<D>([&](
auto i) {
397 shape_[i] = shape[i];
398 stride_[i] = stride[i];
405 std::enable_if_t<common::detail::IsAllowedElementTypeConversion<U, T>::value> * =
nullptr>
407 : data_{that.
Values()}, ptr_{data_.data()}, size_{that.
Size()}, device_{that.
DeviceIdx()} {
408 detail::UnrollLoop<kDim>([&](
auto i) {
409 stride_[i] = that.
Stride(i);
410 shape_[i] = that.
Shape(i);
429 static_assert(
sizeof...(index) <= kDim,
"Invalid index.");
430 size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
431 assert(offset < data_.
size() &&
"Out of bound access.");
439 static_assert(
sizeof...(index) <= kDim,
"Invalid index.");
440 size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
441 assert(offset < data_.
size() &&
"Out of bound access.");
458 template <
typename... S>
460 static_assert(
sizeof...(slices) <= kDim,
"Invalid slice.");
461 int32_t constexpr kNewDim{detail::CalcSliceDim<detail::IndexToTag<S>...>()};
462 size_t new_shape[kNewDim];
463 size_t new_stride[kNewDim];
464 auto offset = MakeSliceDim<0, 0, kNewDim>(new_shape, new_stride, std::forward<S>(slices)...);
498 static_assert(std::is_same<decltype(stride), decltype(stride_)>::value,
"");
508 static_assert(std::is_same<decltype(stride), decltype(stride_)>::value,
"");
510 detail::CalcStride<kDim, true>(shape_, stride);
526 template <
typename Container,
typename I, int32_t D,
527 std::enable_if_t<!common::detail::IsSpan<Container>::value> * =
nullptr>
529 using T =
typename Container::value_type;
533 template <
typename T,
typename I,
int32_t D>
543 if (idx > std::numeric_limits<uint32_t>::max()) {
544 return detail::UnravelImpl<uint64_t, D>(
static_cast<uint64_t
>(idx), shape);
546 return detail::UnravelImpl<uint32_t, D>(
static_cast<uint32_t
>(idx), shape);
555 template <
typename T>
565 template <
typename T>
566 auto MakeVec(T *ptr,
size_t s, int32_t device = -1) {
570 template <
typename T>
576 template <
typename T>
587 template <
typename T>
596 template <
typename T,
int32_t D>
599 array_interface[
"data"] = std::vector<Json>(2);
600 array_interface[
"data"][0] =
Integer{
reinterpret_cast<int64_t
>(t.
Values().data())};
601 array_interface[
"data"][1] =
Boolean{
true};
604 array_interface[
"stream"] =
Null{};
606 std::vector<Json> shape(t.
Shape().size());
607 std::vector<Json> stride(t.
Stride().size());
608 for (
size_t i = 0; i < t.
Shape().size(); ++i) {
612 array_interface[
"shape"] =
Array{shape};
613 array_interface[
"strides"] =
Array{stride};
614 array_interface[
"version"] = 3;
616 char constexpr kT = detail::ArrayInterfaceHandler::TypeChar<T>();
617 static_assert(kT !=
'\0',
"");
618 if (DMLC_LITTLE_ENDIAN) {
619 array_interface[
"typestr"] =
String{
"<" + (kT + std::to_string(
sizeof(T)))};
621 array_interface[
"typestr"] =
String{
">" + (kT + std::to_string(
sizeof(T)))};
623 return array_interface;
629 template <
typename T,
int32_t D>
633 res[
"data"][1] =
Boolean{
false};
640 template <
typename T,
int32_t D>
647 template <
typename T,
int32_t D>
658 template <
typename T,
int32_t kDim = 5>
668 template <
typename I, std::
int32_t D>
669 void Initialize(I
const (&shape)[D], std::int32_t device) {
670 static_assert(D <= kDim,
"Invalid shape.");
671 std::copy(shape, shape + D, shape_);
672 for (
auto i = D; i < kDim; ++i) {
691 template <
typename I,
int32_t D>
692 explicit Tensor(I
const (&shape)[D], int32_t device)
693 :
Tensor{common::Span<I const, D>{shape}, device} {}
695 template <
typename I,
size_t D>
698 std::copy(shape.
data(), shape.
data() + D, shape_);
699 for (
auto i = D; i < kDim; ++i) {
714 template <
typename It,
typename I,
int32_t D>
715 explicit Tensor(It begin, It end, I
const (&shape)[D], int32_t device) {
717 h_vec.insert(h_vec.begin(), begin, end);
719 this->Initialize(shape, device);
722 template <
typename I,
int32_t D>
723 explicit Tensor(std::initializer_list<T> data, I
const (&shape)[D],
728 this->Initialize(shape, device);
734 template <
typename... Index>
736 return this->HostView()(std::forward<Index>(idx)...);
742 template <
typename... Index>
744 return this->HostView()(std::forward<Index>(idx)...);
754 return {span, shape_, device};
757 return {span, shape_, device};
764 return {span, shape_, device};
767 return {span, shape_, device};
776 auto Shape(
size_t i)
const {
return shape_[i]; }
787 template <
typename Fn>
791 <<
"Inconsistent size after modification.";
801 static_assert(
sizeof...(S) <= kDim,
"Invalid shape.");
802 detail::ReshapeImpl<0>(shape_, std::forward<S>(s)...);
803 auto constexpr kEnd =
sizeof...(S);
804 static_assert(kEnd <= kDim,
"Invalid shape.");
805 std::fill(shape_ + kEnd, shape_ + kDim, 1);
817 static_assert(D <= kDim,
"Invalid shape.");
818 std::copy(shape.
data(), shape.
data() + D, this->shape_);
819 std::fill(shape_ + D, shape_ + kDim, 1);
837 template <
typename T,
int32_t D>
843 for (
size_t i = 1; i < D; ++i) {
845 shape[i] = r.
Shape(i);
847 CHECK_EQ(shape[i], r.
Shape(i));
857 #if defined(LINALG_HD)
defines configuration macros of xgboost.
Definition: host_device_vector.h:86
const T * ConstDevicePointer() const
void Extend(const HostDeviceVector< T > &other)
common::Span< T const > ConstHostSpan() const
Definition: host_device_vector.h:114
std::vector< T > & HostVector()
void Resize(size_t new_size, T v=T())
common::Span< const T > ConstDeviceSpan() const
T * HostPointer()
Definition: host_device_vector.h:111
void SetDevice(int device) const
common::Span< T > DeviceSpan()
common::Span< T > HostSpan()
Definition: host_device_vector.h:112
const T * ConstHostPointer() const
Definition: host_device_vector.h:115
Describes both true and false.
Definition: json.h:311
Data structure representing JSON format.
Definition: json.h:356
static void Dump(Json json, std::string *out, std::ios::openmode mode=std::ios::out)
Encode the JSON object. Optional parameter mode for choosing between text and binary (ubjson) output.
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition: span.h:423
constexpr XGBOOST_DEVICE pointer data() const __span_noexcept
Definition: span.h:548
XGBOOST_DEVICE auto subspan() const -> Span< element_type, detail::ExtentValue< Extent, Offset, Count >::value >
Definition: span.h:595
constexpr XGBOOST_DEVICE index_type size() const __span_noexcept
Definition: span.h:553
constexpr XGBOOST_DEVICE bool empty() const __span_noexcept
Definition: span.h:560
A tensor view with static type and dimension. It implements indexing and slicing.
Definition: linalg.h:262
LINALG_HD auto DeviceIdx() const
Obtain the CUDA device ordinal.
Definition: linalg.h:520
size_t[kDim] ShapeT
Definition: linalg.h:264
LINALG_HD bool CContiguous() const
Whether it's a c-contiguous array.
Definition: linalg.h:496
LINALG_HD auto Stride(size_t i) const
Definition: linalg.h:481
LINALG_HD auto Shape() const
Definition: linalg.h:472
ShapeT StrideT
Definition: linalg.h:265
constexpr static size_t kDimension
Definition: linalg.h:358
LINALG_HD auto Stride() const
Definition: linalg.h:477
LINALG_HD auto Slice(S &&...slices) const
Slice the tensor. The returned tensor has inferred dim and shape. Scalar result is not supported.
Definition: linalg.h:459
LINALG_HD auto Values() const -> decltype(data_) const &
Obtain a reference to the raw data.
Definition: linalg.h:516
LINALG_HD bool Contiguous() const
Whether this is a contiguous array, both C and F contiguous returns true.
Definition: linalg.h:490
LINALG_HD T const & operator()(Index &&...index) const
Index the tensor to obtain a scalar value.
Definition: linalg.h:438
LINALG_HD TensorView(TensorView< U, kDim > const &that)
Definition: linalg.h:406
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], int32_t device)
Create a tensor with data and shape.
Definition: linalg.h:373
LINALG_HD size_t Size() const
Number of items in the tensor.
Definition: linalg.h:486
LINALG_HD T & operator()(Index &&...index)
Index the tensor to obtain a scalar value.
Definition: linalg.h:428
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], I const (&stride)[D], int32_t device)
Create a tensor with data, shape and strides. Don't use this constructor if stride can be calculated ...
Definition: linalg.h:392
constexpr static size_t kValueSize
Definition: linalg.h:357
LINALG_HD bool FContiguous() const
Whether it's a f-contiguous array.
Definition: linalg.h:506
LINALG_HD auto Shape(size_t i) const
Definition: linalg.h:476
A tensor storage. To use it for other functionality like slicing one needs to obtain a view first....
Definition: linalg.h:659
TensorView< T const, kDim > View(int32_t device) const
Definition: linalg.h:760
TensorView< T, kDim > View(int32_t device)
Get a TensorView for this tensor.
Definition: linalg.h:750
size_t[kDim] ShapeT
Definition: linalg.h:661
void SetDevice(int32_t device) const
Set device ordinal for this tensor.
Definition: linalg.h:832
HostDeviceVector< T > const * Data() const
Definition: linalg.h:779
void Reshape(size_t(&shape)[D])
Definition: linalg.h:825
auto HostView()
Definition: linalg.h:772
auto Shape(size_t i) const
Definition: linalg.h:776
HostDeviceVector< T > * Data()
Definition: linalg.h:778
Tensor(common::Span< I const, D > shape, int32_t device)
Definition: linalg.h:696
T & operator()(Index &&...idx)
Index operator. Not thread safe, should not be used in performance critical region....
Definition: linalg.h:735
Tensor(It begin, It end, I const (&shape)[D], int32_t device)
Definition: linalg.h:715
auto Shape() const
Definition: linalg.h:775
Tensor(std::initializer_list< T > data, I const (&shape)[D], int32_t device=Context::kCpuId)
Definition: linalg.h:723
Tensor(I const (&shape)[D], int32_t device)
Create a tensor with shape and device ordinal. The storage is initialized automatically.
Definition: linalg.h:692
void ModifyInplace(Fn &&fn)
Visitor function for modification that changes shape and data.
Definition: linalg.h:788
void Reshape(common::Span< size_t const, D > shape)
Reshape the tensor.
Definition: linalg.h:816
auto HostView() const
Definition: linalg.h:771
T const & operator()(Index &&...idx) const
Index operator. Not thread safe, should not be used in performance critical region....
Definition: linalg.h:743
size_t Size() const
Definition: linalg.h:774
void Reshape(S &&...s)
Reshape the tensor.
Definition: linalg.h:800
int32_t DeviceIdx() const
Definition: linalg.h:833
ShapeT StrideT
Definition: linalg.h:662
A device-and-host vector abstraction layer.
#define LINALG_HD
Definition: linalg.h:30
Definition: intrusive_ptr.h:207
LINALG_HD auto UnravelImpl(I idx, common::Span< size_t const, D > shape)
Definition: linalg.h:169
void ReshapeImpl(size_t(&out_shape)[D], I s)
Definition: linalg.h:189
LINALG_HD int Popc(uint32_t v)
Definition: linalg.h:131
std::remove_const_t< std::remove_reference_t< S > > RemoveCRType
Definition: linalg.h:109
constexpr int32_t CalcSliceDim()
Calculate the dimension of sliced tensor.
Definition: linalg.h:90
constexpr LINALG_HD auto UnrollLoop(Fn fn)
Definition: linalg.h:115
constexpr auto Arr2Tup(T(&arr)[N], std::index_sequence< Idx... >)
Definition: linalg.h:156
std::conditional_t< std::is_integral< RemoveCRType< S > >::value, IntTag, S > IndexToTag
Definition: linalg.h:112
constexpr size_t Offset(S(&strides)[D], size_t n, Head head)
Definition: linalg.h:48
decltype(auto) constexpr LINALG_HD Apply(Fn &&f, Tup &&t, std::index_sequence< I... >)
Definition: linalg.h:203
constexpr void CalcStride(size_t const (&shape)[D], size_t(&stride)[D])
Definition: linalg.h:61
int32_t NativePopc(T v)
Definition: linalg.h:125
std::enable_if_t< IsAllIntegral< Index... >::value > EnableIfIntegral
Definition: linalg.h:233
constexpr size_t CalcSize(size_t(&shape)[D])
Definition: linalg.h:100
constexpr detail::RangeTag< I > Range(I beg, I end)
Specify a range of elements in the axis for slicing.
Definition: linalg.h:244
auto MakeVec(T *ptr, size_t s, int32_t device=-1)
Create a vector view from contigious memory.
Definition: linalg.h:566
auto ArrayInterfaceStr(TensorView< T const, D > const &t)
Return string representation of array interface.
Definition: linalg.h:641
LINALG_HD auto UnravelIndex(size_t idx, common::Span< size_t const, D > shape)
Turns linear index into multi-dimension index. Similar to numpy unravel.
Definition: linalg.h:542
void Stack(Tensor< T, D > *l, Tensor< T, D > const &r)
Definition: linalg.h:838
constexpr detail::AllTag All()
Specify all elements in the axis for slicing.
Definition: linalg.h:239
Json ArrayInterface(TensorView< T const, D > const &t)
Array Interface defined by numpy.
Definition: linalg.h:597
auto MakeTensorView(Container &data, I const (&shape)[D], int32_t device)
Constructor for automatic type deduction.
Definition: linalg.h:528
namespace of xgboost
Definition: base.h:110
JsonInteger Integer
Definition: json.h:592
static constexpr int32_t kCpuId
Definition: generic_parameters.h:22
static constexpr char TypeChar()
Definition: linalg.h:40
constexpr size_t Size() const
Definition: linalg.h:83
I end
Definition: linalg.h:82
I beg
Definition: linalg.h:81