release_1.7.0/dev/linalg_8h_source.html

 #ifndef XGBOOST_LINALG_H_

 #define XGBOOST_LINALG_H_


 #include <dmlc/endian.h>

 #include <xgboost/base.h>

 #include <xgboost/generic_parameters.h>

 #include <xgboost/host_device_vector.h>

 #include <xgboost/json.h>

 #include <xgboost/span.h>


 #include <algorithm>

 #include <cassert>

 #include <limits>

 #include <string>

 #include <tuple>

 #include <type_traits>

 #include <utility>

 #include <vector>


 // decouple it from xgboost.

 #ifndef LINALG_HD

 #if defined(__CUDA__) || defined(__NVCC__)

 #define LINALG_HD __host__ __device__

 #else

 #define LINALG_HD

 #endif  // defined (__CUDA__) || defined(__NVCC__)

 #endif  // LINALG_HD


 namespace xgboost {

 namespace linalg {

 namespace detail {


 struct ArrayInterfaceHandler {

   template <typename T>

   static constexpr char TypeChar() {

     return (std::is_floating_point<T>::value

                 ? 'f'

                 : (std::is_integral<T>::value ? (std::is_signed<T>::value ? 'i' : 'u') : '\0'));

   }

 };


 template <size_t dim, typename S, typename Head, size_t D>

 constexpr size_t Offset(S (&strides)[D], size_t n, Head head) {

   static_assert(dim < D, "");

   return n + head * strides[dim];

 }


 template <size_t dim, typename S, size_t D, typename Head, typename... Tail>

 constexpr std::enable_if_t<sizeof...(Tail) != 0, size_t> Offset(S (&strides)[D], size_t n,

                                                                 Head head, Tail &&...rest) {

   static_assert(dim < D, "");

   return Offset<dim + 1>(strides, n + (head * strides[dim]), std::forward<Tail>(rest)...);

 }


 template <int32_t D, bool f_array = false>

 constexpr void CalcStride(size_t const (&shape)[D], size_t (&stride)[D]) {

   if (f_array) {

     stride[0] = 1;

     for (int32_t s = 1; s < D; ++s) {

       stride[s] = shape[s - 1] * stride[s - 1];

     }

   } else {

     stride[D - 1] = 1;

     for (int32_t s = D - 2; s >= 0; --s) {

       stride[s] = shape[s + 1] * stride[s + 1];

     }

   }

 }


 struct AllTag {};


 struct IntTag {};


 template <typename I>

 struct RangeTag {

   I beg;

   I end;

   constexpr size_t Size() const { return end - beg; }

 };


 template <typename T>

 constexpr int32_t CalcSliceDim() {

   return std::is_same<T, IntTag>::value ? 0 : 1;

 }


 template <typename T, typename... S>

 constexpr std::enable_if_t<sizeof...(S) != 0, int32_t> CalcSliceDim() {

   return CalcSliceDim<T>() + CalcSliceDim<S...>();

 }


 template <int32_t D>

 constexpr size_t CalcSize(size_t (&shape)[D]) {

   size_t size = 1;

   for (auto d : shape) {

     size *= d;

   }

   return size;

 }


 template <typename S>

 using RemoveCRType = std::remove_const_t<std::remove_reference_t<S>>;


 template <typename S>

 using IndexToTag = std::conditional_t<std::is_integral<RemoveCRType<S>>::value, IntTag, S>;


 template <int32_t n, typename Fn>

 LINALG_HD constexpr auto UnrollLoop(Fn fn) {

 #if defined __CUDA_ARCH__

 #pragma unroll n

 #endif  // defined __CUDA_ARCH__

   for (int32_t i = 0; i < n; ++i) {

     fn(i);

   }

 }


 template <typename T>

 int32_t NativePopc(T v) {

   int c = 0;

   for (; v != 0; v &= v - 1) c++;

   return c;

 }


 inline LINALG_HD int Popc(uint32_t v) {

 #if defined(__CUDA_ARCH__)

   return __popc(v);

 #elif defined(__GNUC__) || defined(__clang__)

   return __builtin_popcount(v);

 #elif defined(_MSC_VER)

   return __popcnt(v);

 #else

   return NativePopc(v);

 #endif  // compiler

 }


 inline LINALG_HD int Popc(uint64_t v) {

 #if defined(__CUDA_ARCH__)

   return __popcll(v);

 #elif defined(__GNUC__) || defined(__clang__)

   return __builtin_popcountll(v);

 #elif defined(_MSC_VER)

   return __popcnt64(v);

 #else

   return NativePopc(v);

 #endif  // compiler

 }


 template <class T, std::size_t N, std::size_t... Idx>

 constexpr auto Arr2Tup(T (&arr)[N], std::index_sequence<Idx...>) {

   return std::make_tuple(arr[Idx]...);

 }


 template <class T, std::size_t N>

 constexpr auto Arr2Tup(T (&arr)[N]) {

   return Arr2Tup(arr, std::make_index_sequence<N>{});

 }


 // uint division optimization inspired by the CIndexer in cupy.  Division operation is

 // slow on both CPU and GPU, especially 64 bit integer.  So here we first try to avoid 64

 // bit when the index is smaller, then try to avoid division when it's exp of 2.

 template <typename I, int32_t D>

 LINALG_HD auto UnravelImpl(I idx, common::Span<size_t const, D> shape) {

   size_t index[D]{0};

   static_assert(std::is_signed<decltype(D)>::value,

                 "Don't change the type without changing the for loop.");

   for (int32_t dim = D; --dim > 0;) {

     auto s = static_cast<std::remove_const_t<std::remove_reference_t<I>>>(shape[dim]);

     if (s & (s - 1)) {

       auto t = idx / s;

       index[dim] = idx - t * s;

       idx = t;

     } else {  // exp of 2

       index[dim] = idx & (s - 1);

       idx >>= Popc(s - 1);

     }

   }

   index[0] = idx;

   return Arr2Tup(index);

 }


 template <size_t dim, typename I, int32_t D>

 void ReshapeImpl(size_t (&out_shape)[D], I s) {

   static_assert(dim < D, "");

   out_shape[dim] = s;

 }


 template <size_t dim, int32_t D, typename... S, typename I,

           std::enable_if_t<sizeof...(S) != 0> * = nullptr>

 void ReshapeImpl(size_t (&out_shape)[D], I &&s, S &&...rest) {

   static_assert(dim < D, "");

   out_shape[dim] = s;

   ReshapeImpl<dim + 1>(out_shape, std::forward<S>(rest)...);

 }


 template <typename Fn, typename Tup, size_t... I>

 LINALG_HD decltype(auto) constexpr Apply(Fn &&f, Tup &&t, std::index_sequence<I...>) {

   return f(std::get<I>(t)...);

 }


 template <typename Fn, typename Tup>

 LINALG_HD decltype(auto) constexpr Apply(Fn &&f, Tup &&t) {

   constexpr auto kSize = std::tuple_size<Tup>::value;

   return Apply(std::forward<Fn>(f), std::forward<Tup>(t), std::make_index_sequence<kSize>{});

 }


 template <class...>

 struct Conjunction : std::true_type {};

 template <class B1>

 struct Conjunction<B1> : B1 {};

 template <class B1, class... Bn>

 struct Conjunction<B1, Bn...> : std::conditional_t<bool(B1::value), Conjunction<Bn...>, B1> {};


 template <typename... Index>

 using IsAllIntegral = Conjunction<std::is_integral<std::remove_reference_t<Index>>...>;


 template <typename... Index>

 using EnableIfIntegral = std::enable_if_t<IsAllIntegral<Index...>::value>;

 }  // namespace detail


 constexpr detail::AllTag All() { return {}; }

 template <typename I>

 constexpr detail::RangeTag<I> Range(I beg, I end) {

   return {beg, end};

 }


 template <typename T, int32_t kDim>

 class TensorView {

  public:

   using ShapeT = size_t[kDim];

   using StrideT = ShapeT;


  private:

   StrideT stride_{1};

   ShapeT shape_{0};

   common::Span<T> data_;

   T *ptr_{nullptr};  // pointer of data_ to avoid bound check.


   size_t size_{0};

   int32_t device_{-1};


   // Unlike `Tensor`, the data_ can have arbitrary size since this is just a view.

   LINALG_HD void CalcSize() {

     if (data_.empty()) {

       size_ = 0;

     } else {

       size_ = detail::CalcSize(shape_);

     }

   }


   template <size_t old_dim, size_t new_dim, int32_t D, typename I>

   LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D],

                                 detail::RangeTag<I> &&range) const {

     static_assert(new_dim < D, "");

     static_assert(old_dim < kDim, "");

     new_stride[new_dim] = stride_[old_dim];

     new_shape[new_dim] = range.Size();

     assert(static_cast<decltype(shape_[old_dim])>(range.end) <= shape_[old_dim]);


     auto offset = stride_[old_dim] * range.beg;

     return offset;

   }

   template <size_t old_dim, size_t new_dim, int32_t D, typename I, typename... S>

   LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D],

                                 detail::RangeTag<I> &&range, S &&...slices) const {

     static_assert(new_dim < D, "");

     static_assert(old_dim < kDim, "");

     new_stride[new_dim] = stride_[old_dim];

     new_shape[new_dim] = range.Size();

     assert(static_cast<decltype(shape_[old_dim])>(range.end) <= shape_[old_dim]);


     auto offset = stride_[old_dim] * range.beg;

     return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,

                                                      std::forward<S>(slices)...) +

            offset;

   }


   template <size_t old_dim, size_t new_dim, int32_t D>

   LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag) const {

     static_assert(new_dim < D, "");

     static_assert(old_dim < kDim, "");

     new_stride[new_dim] = stride_[old_dim];

     new_shape[new_dim] = shape_[old_dim];

     return 0;

   }

   template <size_t old_dim, size_t new_dim, int32_t D, typename... S>

   LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag,

                                 S &&...slices) const {

     static_assert(new_dim < D, "");

     static_assert(old_dim < kDim, "");

     new_stride[new_dim] = stride_[old_dim];

     new_shape[new_dim] = shape_[old_dim];

     return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,

                                                      std::forward<S>(slices)...);

   }


   template <size_t old_dim, size_t new_dim, int32_t D, typename Index>

   LINALG_HD size_t MakeSliceDim(DMLC_ATTRIBUTE_UNUSED size_t new_shape[D],

                                 DMLC_ATTRIBUTE_UNUSED size_t new_stride[D], Index i) const {

     static_assert(old_dim < kDim, "");

     return stride_[old_dim] * i;

   }

   template <size_t old_dim, size_t new_dim, int32_t D, typename Index, typename... S>

   LINALG_HD std::enable_if_t<std::is_integral<Index>::value, size_t> MakeSliceDim(

       size_t new_shape[D], size_t new_stride[D], Index i, S &&...slices) const {

     static_assert(old_dim < kDim, "");

     auto offset = stride_[old_dim] * i;

     auto res =

         MakeSliceDim<old_dim + 1, new_dim, D>(new_shape, new_stride, std::forward<S>(slices)...);

     return res + offset;

   }


  public:

   size_t constexpr static kValueSize = sizeof(T);

   size_t constexpr static kDimension = kDim;


  public:

   template <typename I, int32_t D>

   LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], int32_t device)

       : data_{data}, ptr_{data_.data()}, device_{device} {

     static_assert(D > 0 && D <= kDim, "Invalid shape.");

     // shape

     detail::UnrollLoop<D>([&](auto i) { shape_[i] = shape[i]; });

     for (auto i = D; i < kDim; ++i) {

       shape_[i] = 1;

     }

     // stride

     detail::CalcStride(shape_, stride_);

     // size

     this->CalcSize();

   }


   template <typename I, int32_t D>

   LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], I const (&stride)[D],

                        int32_t device)

       : data_{data}, ptr_{data_.data()}, device_{device} {

     static_assert(D == kDim, "Invalid shape & stride.");

     detail::UnrollLoop<D>([&](auto i) {

       shape_[i] = shape[i];

       stride_[i] = stride[i];

     });

     this->CalcSize();

   }


   template <

       typename U,

       std::enable_if_t<common::detail::IsAllowedElementTypeConversion<U, T>::value> * = nullptr>

   LINALG_HD TensorView(TensorView<U, kDim> const &that)  // NOLINT

       : data_{that.Values()}, ptr_{data_.data()}, size_{that.Size()}, device_{that.DeviceIdx()} {

     detail::UnrollLoop<kDim>([&](auto i) {

       stride_[i] = that.Stride(i);

       shape_[i] = that.Shape(i);

     });

   }


   template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>

   LINALG_HD T &operator()(Index &&...index) {

     static_assert(sizeof...(index) <= kDim, "Invalid index.");

     size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);

     assert(offset < data_.size() && "Out of bound access.");

     return ptr_[offset];

   }

   template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>

   LINALG_HD T const &operator()(Index &&...index) const {

     static_assert(sizeof...(index) <= kDim, "Invalid index.");

     size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);

     assert(offset < data_.size() && "Out of bound access.");

     return ptr_[offset];

   }


   template <typename... S>

   LINALG_HD auto Slice(S &&...slices) const {

     static_assert(sizeof...(slices) <= kDim, "Invalid slice.");

     int32_t constexpr kNewDim{detail::CalcSliceDim<detail::IndexToTag<S>...>()};

     size_t new_shape[kNewDim];

     size_t new_stride[kNewDim];

     auto offset = MakeSliceDim<0, 0, kNewDim>(new_shape, new_stride, std::forward<S>(slices)...);

     // ret is a different type due to changed dimension, so we can not access its private

     // fields.

     TensorView<T, kNewDim> ret{data_.subspan(data_.empty() ? 0 : offset), new_shape, new_stride,

                                device_};

     return ret;

   }


   LINALG_HD auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }

   LINALG_HD auto Shape(size_t i) const { return shape_[i]; }

   LINALG_HD auto Stride() const { return common::Span<size_t const, kDim>{stride_}; }

   LINALG_HD auto Stride(size_t i) const { return stride_[i]; }


   LINALG_HD size_t Size() const { return size_; }

   LINALG_HD bool Contiguous() const {

     return data_.size() == this->Size() || this->CContiguous() || this->FContiguous();

   }

   LINALG_HD bool CContiguous() const {

     StrideT stride;

     static_assert(std::is_same<decltype(stride), decltype(stride_)>::value, "");

     // It's contiguous if the stride can be calculated from shape.

     detail::CalcStride(shape_, stride);

     return common::Span<size_t const, kDim>{stride_} == common::Span<size_t const, kDim>{stride};

   }

   LINALG_HD bool FContiguous() const {

     StrideT stride;

     static_assert(std::is_same<decltype(stride), decltype(stride_)>::value, "");

     // It's contiguous if the stride can be calculated from shape.

     detail::CalcStride<kDim, true>(shape_, stride);

     return common::Span<size_t const, kDim>{stride_} == common::Span<size_t const, kDim>{stride};

   }

   LINALG_HD auto Values() const -> decltype(data_) const & { return data_; }

   LINALG_HD auto DeviceIdx() const { return device_; }

 };


 template <typename Container, typename I, int32_t D,

           std::enable_if_t<!common::detail::IsSpan<Container>::value> * = nullptr>

 auto MakeTensorView(Container &data, I const (&shape)[D], int32_t device) {  // NOLINT

   using T = typename Container::value_type;

   return TensorView<T, D>{data, shape, device};

 }


 template <typename T, typename I, int32_t D>

 LINALG_HD auto MakeTensorView(common::Span<T> data, I const (&shape)[D], int32_t device) {

   return TensorView<T, D>{data, shape, device};

 }


 template <size_t D>

 LINALG_HD auto UnravelIndex(size_t idx, common::Span<size_t const, D> shape) {

   if (idx > std::numeric_limits<uint32_t>::max()) {

     return detail::UnravelImpl<uint64_t, D>(static_cast<uint64_t>(idx), shape);

   } else {

     return detail::UnravelImpl<uint32_t, D>(static_cast<uint32_t>(idx), shape);

   }

 }


 template <typename T>

 using VectorView = TensorView<T, 1>;


 template <typename T>

 auto MakeVec(T *ptr, size_t s, int32_t device = -1) {

   return linalg::TensorView<T, 1>{{ptr, s}, {s}, device};

 }


 template <typename T>

 auto MakeVec(HostDeviceVector<T> *data) {

   return MakeVec(data->DeviceIdx() == -1 ? data->HostPointer() : data->DevicePointer(),

                  data->Size(), data->DeviceIdx());

 }


 template <typename T>

 auto MakeVec(HostDeviceVector<T> const *data) {

   return MakeVec(data->DeviceIdx() == -1 ? data->ConstHostPointer() : data->ConstDevicePointer(),

                  data->Size(), data->DeviceIdx());

 }


 template <typename T>

 using MatrixView = TensorView<T, 2>;


 template <typename T, int32_t D>

 Json ArrayInterface(TensorView<T const, D> const &t) {

   Json array_interface{Object{}};

   array_interface["data"] = std::vector<Json>(2);

   array_interface["data"][0] = Integer{reinterpret_cast<int64_t>(t.Values().data())};

   array_interface["data"][1] = Boolean{true};

   if (t.DeviceIdx() >= 0) {

     // Change this once we have different CUDA stream.

     array_interface["stream"] = Null{};

   }

   std::vector<Json> shape(t.Shape().size());

   std::vector<Json> stride(t.Stride().size());

   for (size_t i = 0; i < t.Shape().size(); ++i) {

     shape[i] = Integer(t.Shape(i));

     stride[i] = Integer(t.Stride(i) * sizeof(T));

   }

   array_interface["shape"] = Array{shape};

   array_interface["strides"] = Array{stride};

   array_interface["version"] = 3;


   char constexpr kT = detail::ArrayInterfaceHandler::TypeChar<T>();

   static_assert(kT != '\0', "");

   if (DMLC_LITTLE_ENDIAN) {

     array_interface["typestr"] = String{"<" + (kT + std::to_string(sizeof(T)))};

   } else {

     array_interface["typestr"] = String{">" + (kT + std::to_string(sizeof(T)))};

   }

   return array_interface;

 }


 template <typename T, int32_t D>

 Json ArrayInterface(TensorView<T, D> const &t) {

   TensorView<T const, D> const &as_const = t;

   auto res = ArrayInterface(as_const);

   res["data"][1] = Boolean{false};

   return res;

 }


 template <typename T, int32_t D>

 auto ArrayInterfaceStr(TensorView<T const, D> const &t) {

   std::string str;

   Json::Dump(ArrayInterface(t), &str);

   return str;

 }


 template <typename T, int32_t D>

 auto ArrayInterfaceStr(TensorView<T, D> const &t) {

   std::string str;

   Json::Dump(ArrayInterface(t), &str);

   return str;

 }


 template <typename T, int32_t kDim = 5>

 class Tensor {

  public:

   using ShapeT = size_t[kDim];

   using StrideT = ShapeT;


  private:

   HostDeviceVector<T> data_;

   ShapeT shape_{0};


   template <typename I, std::int32_t D>

   void Initialize(I const (&shape)[D], std::int32_t device) {

     static_assert(D <= kDim, "Invalid shape.");

     std::copy(shape, shape + D, shape_);

     for (auto i = D; i < kDim; ++i) {

       shape_[i] = 1;

     }

     if (device >= 0) {

       data_.SetDevice(device);

       data_.ConstDevicePointer();  // Pull to device;

     }

     CHECK_EQ(data_.Size(), detail::CalcSize(shape_));

   }


  public:

   Tensor() = default;


   template <typename I, int32_t D>

   explicit Tensor(I const (&shape)[D], int32_t device)

       : Tensor{common::Span<I const, D>{shape}, device} {}


   template <typename I, size_t D>

   explicit Tensor(common::Span<I const, D> shape, int32_t device) {

     // No device unroll as this is a host only function.

     std::copy(shape.data(), shape.data() + D, shape_);

     for (auto i = D; i < kDim; ++i) {

       shape_[i] = 1;

     }

     auto size = detail::CalcSize(shape_);

     if (device >= 0) {

       data_.SetDevice(device);

     }

     data_.Resize(size);

     if (device >= 0) {

       data_.DevicePointer();  // Pull to device

     }

   }

   template <typename It, typename I, int32_t D>

   explicit Tensor(It begin, It end, I const (&shape)[D], int32_t device) {

     auto &h_vec = data_.HostVector();

     h_vec.insert(h_vec.begin(), begin, end);

     // shape

     this->Initialize(shape, device);

   }


   template <typename I, int32_t D>

   explicit Tensor(std::initializer_list<T> data, I const (&shape)[D],

                   int32_t device = Context::kCpuId) {

     auto &h_vec = data_.HostVector();

     h_vec = data;

     // shape

     this->Initialize(shape, device);

   }

   template <typename... Index>

   T &operator()(Index &&...idx) {

     return this->HostView()(std::forward<Index>(idx)...);

   }

   template <typename... Index>

   T const &operator()(Index &&...idx) const {

     return this->HostView()(std::forward<Index>(idx)...);

   }


   TensorView<T, kDim> View(int32_t device) {

     if (device >= 0) {

       data_.SetDevice(device);

       auto span = data_.DeviceSpan();

       return {span, shape_, device};

     } else {

       auto span = data_.HostSpan();

       return {span, shape_, device};

     }

   }

   TensorView<T const, kDim> View(int32_t device) const {

     if (device >= 0) {

       data_.SetDevice(device);

       auto span = data_.ConstDeviceSpan();

       return {span, shape_, device};

     } else {

       auto span = data_.ConstHostSpan();

       return {span, shape_, device};

     }

   }


   auto HostView() const { return this->View(-1); }

   auto HostView() { return this->View(-1); }


   size_t Size() const { return data_.Size(); }

   auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }

   auto Shape(size_t i) const { return shape_[i]; }


   HostDeviceVector<T> *Data() { return &data_; }

   HostDeviceVector<T> const *Data() const { return &data_; }


   template <typename Fn>

   void ModifyInplace(Fn &&fn) {

     fn(this->Data(), common::Span<size_t, kDim>{this->shape_});

     CHECK_EQ(this->Data()->Size(), detail::CalcSize(this->shape_))

         << "Inconsistent size after modification.";

   }


   template <typename... S, detail::EnableIfIntegral<S...> * = nullptr>

   void Reshape(S &&...s) {

     static_assert(sizeof...(S) <= kDim, "Invalid shape.");

     detail::ReshapeImpl<0>(shape_, std::forward<S>(s)...);

     auto constexpr kEnd = sizeof...(S);

     static_assert(kEnd <= kDim, "Invalid shape.");

     std::fill(shape_ + kEnd, shape_ + kDim, 1);

     auto n = detail::CalcSize(shape_);

     data_.Resize(n);

   }


   template <size_t D>

   void Reshape(common::Span<size_t const, D> shape) {

     static_assert(D <= kDim, "Invalid shape.");

     std::copy(shape.data(), shape.data() + D, this->shape_);

     std::fill(shape_ + D, shape_ + kDim, 1);

     auto n = detail::CalcSize(shape_);

     data_.Resize(n);

   }


   template <size_t D>

   void Reshape(size_t (&shape)[D]) {

     this->Reshape(common::Span<size_t const, D>{shape});

   }


   void SetDevice(int32_t device) const { data_.SetDevice(device); }

   int32_t DeviceIdx() const { return data_.DeviceIdx(); }

 };


 // Only first axis is supported for now.

 template <typename T, int32_t D>

 void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {

   if (r.DeviceIdx() >= 0) {

     l->SetDevice(r.DeviceIdx());

   }

   l->ModifyInplace([&](HostDeviceVector<T> *data, common::Span<size_t, D> shape) {

     for (size_t i = 1; i < D; ++i) {

       if (shape[i] == 0) {

         shape[i] = r.Shape(i);

       } else {

         CHECK_EQ(shape[i], r.Shape(i));

       }

     }

     data->Extend(*r.Data());

     shape[0] = l->Shape(0) + r.Shape(0);

   });

 }

 }  // namespace linalg

 }  // namespace xgboost


 #if defined(LINALG_HD)

 #undef LINALG_HD

 #endif  // defined(LINALG_HD)

 #endif  // XGBOOST_LINALG_H_

base.h
defines configuration macros of xgboost.

xgboost::HostDeviceVector
Definition: host_device_vector.h:86

xgboost::HostDeviceVector::ConstDevicePointer
const T * ConstDevicePointer() const

xgboost::HostDeviceVector::Size
size_t Size() const

xgboost::HostDeviceVector::Extend
void Extend(const HostDeviceVector< T > &other)

xgboost::HostDeviceVector::ConstHostSpan
common::Span< T const  > ConstHostSpan() const
Definition: host_device_vector.h:114

xgboost::HostDeviceVector::HostVector
std::vector< T > & HostVector()

xgboost::HostDeviceVector::Resize
void Resize(size_t new_size, T v=T())

xgboost::HostDeviceVector::ConstDeviceSpan
common::Span< const T > ConstDeviceSpan() const

xgboost::HostDeviceVector::HostPointer
T * HostPointer()
Definition: host_device_vector.h:111

xgboost::HostDeviceVector::DeviceIdx
int DeviceIdx() const

xgboost::HostDeviceVector::SetDevice
void SetDevice(int device) const

xgboost::HostDeviceVector::DeviceSpan
common::Span< T > DeviceSpan()

xgboost::HostDeviceVector::HostSpan
common::Span< T > HostSpan()
Definition: host_device_vector.h:112

xgboost::HostDeviceVector::DevicePointer
T * DevicePointer()

xgboost::HostDeviceVector::ConstHostPointer
const T * ConstHostPointer() const
Definition: host_device_vector.h:115

xgboost::JsonArray
Definition: json.h:112

xgboost::JsonBoolean
Describes both true and false.
Definition: json.h:311

xgboost::JsonInteger
Definition: json.h:252

xgboost::JsonNull
Definition: json.h:295

xgboost::JsonObject
Definition: json.h:189

xgboost::JsonString
Definition: json.h:86

xgboost::Json
Data structure representing JSON format.
Definition: json.h:356

xgboost::Json::Dump
static void Dump(Json json, std::string *out, std::ios::openmode mode=std::ios::out)
Encode the JSON object. Optional parameter mode for choosing between text and binary (ubjson) output.

xgboost::common::Span
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition: span.h:423

xgboost::common::Span::data
constexpr XGBOOST_DEVICE pointer data() const __span_noexcept
Definition: span.h:548

xgboost::common::Span::subspan
XGBOOST_DEVICE auto subspan() const -> Span< element_type, detail::ExtentValue< Extent, Offset, Count >::value >
Definition: span.h:595

xgboost::common::Span::size
constexpr XGBOOST_DEVICE index_type size() const __span_noexcept
Definition: span.h:553

xgboost::common::Span::empty
constexpr XGBOOST_DEVICE bool empty() const __span_noexcept
Definition: span.h:560

xgboost::linalg::TensorView
A tensor view with static type and dimension. It implements indexing and slicing.
Definition: linalg.h:262

xgboost::linalg::TensorView::DeviceIdx
LINALG_HD auto DeviceIdx() const
Obtain the CUDA device ordinal.
Definition: linalg.h:520

xgboost::linalg::TensorView::ShapeT
size_t[kDim] ShapeT
Definition: linalg.h:264

xgboost::linalg::TensorView::CContiguous
LINALG_HD bool CContiguous() const
Whether it's a c-contiguous array.
Definition: linalg.h:496

xgboost::linalg::TensorView::Stride
LINALG_HD auto Stride(size_t i) const
Definition: linalg.h:481

xgboost::linalg::TensorView::Shape
LINALG_HD auto Shape() const
Definition: linalg.h:472

xgboost::linalg::TensorView::StrideT
ShapeT StrideT
Definition: linalg.h:265

xgboost::linalg::TensorView::kDimension
constexpr static size_t kDimension
Definition: linalg.h:358

xgboost::linalg::TensorView::Stride
LINALG_HD auto Stride() const
Definition: linalg.h:477

xgboost::linalg::TensorView::Slice
LINALG_HD auto Slice(S &&...slices) const
Slice the tensor. The returned tensor has inferred dim and shape. Scalar result is not supported.
Definition: linalg.h:459

xgboost::linalg::TensorView::Values
LINALG_HD auto Values() const -> decltype(data_) const &
Obtain a reference to the raw data.
Definition: linalg.h:516

xgboost::linalg::TensorView::Contiguous
LINALG_HD bool Contiguous() const
Whether this is a contiguous array, both C and F contiguous returns true.
Definition: linalg.h:490

xgboost::linalg::TensorView::operator()
LINALG_HD T const  & operator()(Index &&...index) const
Index the tensor to obtain a scalar value.
Definition: linalg.h:438

xgboost::linalg::TensorView::TensorView
LINALG_HD TensorView(TensorView< U, kDim > const &that)
Definition: linalg.h:406

xgboost::linalg::TensorView::TensorView
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], int32_t device)
Create a tensor with data and shape.
Definition: linalg.h:373

xgboost::linalg::TensorView::Size
LINALG_HD size_t Size() const
Number of items in the tensor.
Definition: linalg.h:486

xgboost::linalg::TensorView::operator()
LINALG_HD T & operator()(Index &&...index)
Index the tensor to obtain a scalar value.
Definition: linalg.h:428

xgboost::linalg::TensorView::TensorView
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], I const (&stride)[D], int32_t device)
Create a tensor with data, shape and strides. Don't use this constructor if stride can be calculated ...
Definition: linalg.h:392

xgboost::linalg::TensorView::kValueSize
constexpr static size_t kValueSize
Definition: linalg.h:357

xgboost::linalg::TensorView::FContiguous
LINALG_HD bool FContiguous() const
Whether it's a f-contiguous array.
Definition: linalg.h:506

xgboost::linalg::TensorView::Shape
LINALG_HD auto Shape(size_t i) const
Definition: linalg.h:476

xgboost::linalg::Tensor
A tensor storage. To use it for other functionality like slicing one needs to obtain a view first....
Definition: linalg.h:659

xgboost::linalg::Tensor::View
TensorView< T const, kDim > View(int32_t device) const
Definition: linalg.h:760

xgboost::linalg::Tensor::View
TensorView< T, kDim > View(int32_t device)
Get a TensorView for this tensor.
Definition: linalg.h:750

xgboost::linalg::Tensor::ShapeT
size_t[kDim] ShapeT
Definition: linalg.h:661

xgboost::linalg::Tensor::SetDevice
void SetDevice(int32_t device) const
Set device ordinal for this tensor.
Definition: linalg.h:832

xgboost::linalg::Tensor::Data
HostDeviceVector< T > const  * Data() const
Definition: linalg.h:779

xgboost::linalg::Tensor::Reshape
void Reshape(size_t(&shape)[D])
Definition: linalg.h:825

xgboost::linalg::Tensor::HostView
auto HostView()
Definition: linalg.h:772

xgboost::linalg::Tensor::Shape
auto Shape(size_t i) const
Definition: linalg.h:776

xgboost::linalg::Tensor::Data
HostDeviceVector< T > * Data()
Definition: linalg.h:778

xgboost::linalg::Tensor::Tensor
Tensor(common::Span< I const, D > shape, int32_t device)
Definition: linalg.h:696

xgboost::linalg::Tensor::operator()
T & operator()(Index &&...idx)
Index operator. Not thread safe, should not be used in performance critical region....
Definition: linalg.h:735

xgboost::linalg::Tensor::Tensor
Tensor(It begin, It end, I const (&shape)[D], int32_t device)
Definition: linalg.h:715

xgboost::linalg::Tensor::Shape
auto Shape() const
Definition: linalg.h:775

xgboost::linalg::Tensor::Tensor
Tensor(std::initializer_list< T > data, I const (&shape)[D], int32_t device=Context::kCpuId)
Definition: linalg.h:723

xgboost::linalg::Tensor::Tensor
Tensor(I const (&shape)[D], int32_t device)
Create a tensor with shape and device ordinal. The storage is initialized automatically.
Definition: linalg.h:692

xgboost::linalg::Tensor::Tensor
Tensor()=default

xgboost::linalg::Tensor::ModifyInplace
void ModifyInplace(Fn &&fn)
Visitor function for modification that changes shape and data.
Definition: linalg.h:788

xgboost::linalg::Tensor::Reshape
void Reshape(common::Span< size_t const, D > shape)
Reshape the tensor.
Definition: linalg.h:816

xgboost::linalg::Tensor::HostView
auto HostView() const
Definition: linalg.h:771

xgboost::linalg::Tensor::operator()
T const  & operator()(Index &&...idx) const
Index operator. Not thread safe, should not be used in performance critical region....
Definition: linalg.h:743

xgboost::linalg::Tensor::Size
size_t Size() const
Definition: linalg.h:774

xgboost::linalg::Tensor::Reshape
void Reshape(S &&...s)
Reshape the tensor.
Definition: linalg.h:800

xgboost::linalg::Tensor::DeviceIdx
int32_t DeviceIdx() const
Definition: linalg.h:833

xgboost::linalg::Tensor::StrideT
ShapeT StrideT
Definition: linalg.h:662

generic_parameters.h

host_device_vector.h
A device-and-host vector abstraction layer.

json.h

LINALG_HD
#define LINALG_HD
Definition: linalg.h:30

std
Definition: intrusive_ptr.h:207

xgboost::linalg::detail::UnravelImpl
LINALG_HD auto UnravelImpl(I idx, common::Span< size_t const, D > shape)
Definition: linalg.h:169

xgboost::linalg::detail::ReshapeImpl
void ReshapeImpl(size_t(&out_shape)[D], I s)
Definition: linalg.h:189

xgboost::linalg::detail::Popc
LINALG_HD int Popc(uint32_t v)
Definition: linalg.h:131

xgboost::linalg::detail::RemoveCRType
std::remove_const_t< std::remove_reference_t< S > > RemoveCRType
Definition: linalg.h:109

xgboost::linalg::detail::CalcSliceDim
constexpr int32_t CalcSliceDim()
Calculate the dimension of sliced tensor.
Definition: linalg.h:90

xgboost::linalg::detail::UnrollLoop
constexpr LINALG_HD auto UnrollLoop(Fn fn)
Definition: linalg.h:115

xgboost::linalg::detail::Arr2Tup
constexpr auto Arr2Tup(T(&arr)[N], std::index_sequence< Idx... >)
Definition: linalg.h:156

xgboost::linalg::detail::IndexToTag
std::conditional_t< std::is_integral< RemoveCRType< S > >::value, IntTag, S > IndexToTag
Definition: linalg.h:112

xgboost::linalg::detail::Offset
constexpr size_t Offset(S(&strides)[D], size_t n, Head head)
Definition: linalg.h:48

xgboost::linalg::detail::Apply
decltype(auto) constexpr LINALG_HD Apply(Fn &&f, Tup &&t, std::index_sequence< I... >)
Definition: linalg.h:203

xgboost::linalg::detail::CalcStride
constexpr void CalcStride(size_t const (&shape)[D], size_t(&stride)[D])
Definition: linalg.h:61

xgboost::linalg::detail::NativePopc
int32_t NativePopc(T v)
Definition: linalg.h:125

xgboost::linalg::detail::EnableIfIntegral
std::enable_if_t< IsAllIntegral< Index... >::value > EnableIfIntegral
Definition: linalg.h:233

xgboost::linalg::detail::CalcSize
constexpr size_t CalcSize(size_t(&shape)[D])
Definition: linalg.h:100

xgboost::linalg::Range
constexpr detail::RangeTag< I > Range(I beg, I end)
Specify a range of elements in the axis for slicing.
Definition: linalg.h:244

xgboost::linalg::MakeVec
auto MakeVec(T *ptr, size_t s, int32_t device=-1)
Create a vector view from contigious memory.
Definition: linalg.h:566

xgboost::linalg::ArrayInterfaceStr
auto ArrayInterfaceStr(TensorView< T const, D > const &t)
Return string representation of array interface.
Definition: linalg.h:641

xgboost::linalg::UnravelIndex
LINALG_HD auto UnravelIndex(size_t idx, common::Span< size_t const, D > shape)
Turns linear index into multi-dimension index. Similar to numpy unravel.
Definition: linalg.h:542

xgboost::linalg::Stack
void Stack(Tensor< T, D > *l, Tensor< T, D > const &r)
Definition: linalg.h:838

xgboost::linalg::All
constexpr detail::AllTag All()
Specify all elements in the axis for slicing.
Definition: linalg.h:239

xgboost::linalg::ArrayInterface
Json ArrayInterface(TensorView< T const, D > const &t)
Array Interface defined by numpy.
Definition: linalg.h:597

xgboost::linalg::MakeTensorView
auto MakeTensorView(Container &data, I const (&shape)[D], int32_t device)
Constructor for automatic type deduction.
Definition: linalg.h:528

xgboost
namespace of xgboost
Definition: base.h:110

xgboost::Integer
JsonInteger Integer
Definition: json.h:592

span.h

xgboost::GenericParameter::kCpuId
static constexpr int32_t kCpuId
Definition: generic_parameters.h:22

xgboost::linalg::detail::AllTag
Definition: linalg.h:75

xgboost::linalg::detail::ArrayInterfaceHandler
Definition: linalg.h:38

xgboost::linalg::detail::ArrayInterfaceHandler::TypeChar
static constexpr char TypeChar()
Definition: linalg.h:40

xgboost::linalg::detail::Conjunction< B1 >
Definition: linalg.h:225

xgboost::linalg::detail::Conjunction
Definition: linalg.h:223

xgboost::linalg::detail::IntTag
Definition: linalg.h:77

xgboost::linalg::detail::RangeTag
Definition: linalg.h:80

xgboost::linalg::detail::RangeTag::Size
constexpr size_t Size() const
Definition: linalg.h:83

xgboost::linalg::detail::RangeTag::end
I end
Definition: linalg.h:82

xgboost::linalg::detail::RangeTag::beg
I beg
Definition: linalg.h:81