12 #include "device_helpers.cuh" 22 *byte |= 1 << bit_idx;
26 return byte & (1 << bit_idx);
29 *byte &= ~(1 << bit_idx);
31 static const int kPadding = 4;
35 static size_t SymbolBits(
size_t num_symbols) {
36 auto bits = std::ceil(std::log2(num_symbols));
37 return std::max(static_cast<size_t>(bits),
size_t(1));
59 symbol_bits_ = detail::SymbolBits(num_symbols);
79 const int bits_per_byte = 8;
80 size_t compressed_size =
static_cast<size_t>(std::ceil(
81 static_cast<double>(detail::SymbolBits(num_symbols) * num_elements) /
83 return compressed_size + detail::kPadding;
88 const int bits_per_byte = 8;
90 for (
size_t i = 0; i < symbol_bits_; i++) {
91 size_t byte_idx = ((offset + 1) * symbol_bits_ - (i + 1)) / bits_per_byte;
92 byte_idx += detail::kPadding;
94 ((bits_per_byte + i) - ((offset + 1) * symbol_bits_)) % bits_per_byte;
105 __device__
void AtomicWriteSymbol
107 size_t ibit_start = offset * symbol_bits_;
108 size_t ibit_end = (offset + 1) * symbol_bits_ - 1;
109 size_t ibyte_start = ibit_start / 8, ibyte_end = ibit_end / 8;
111 symbol <<= 7 - ibit_end % 8;
113 dh::AtomicOrByte(reinterpret_cast<unsigned int*>(buffer + detail::kPadding),
114 ibyte, symbol & 0xff);
120 template <
typename IterT>
123 size_t stored_bits = 0;
124 const size_t max_stored_bits = 64 - symbol_bits_;
125 size_t buffer_position = detail::kPadding;
126 const size_t num_symbols = input_end - input_begin;
127 for (
size_t i = 0; i < num_symbols; i++) {
128 typename std::iterator_traits<IterT>::value_type symbol = input_begin[i];
129 if (stored_bits > max_stored_bits) {
131 size_t tmp_bytes = stored_bits / 8;
132 for (
size_t j = 0; j < tmp_bytes; j++) {
134 tmp >> (stored_bits - (j + 1) * 8));
137 stored_bits -= tmp_bytes * 8;
138 tmp &= (1 << stored_bits) - 1;
141 tmp <<= symbol_bits_;
143 stored_bits += symbol_bits_;
148 static_cast<int>(std::ceil(static_cast<float>(stored_bits) / 8));
149 for (
int j = 0; j < tmp_bytes; j++) {
150 int shift_bits =
static_cast<int>(stored_bits) - (j + 1) * 8;
151 if (shift_bits >= 0) {
152 buffer[buffer_position] =
155 buffer[buffer_position] =
163 template <
typename T>
192 : buffer_(buffer), offset_(0) {
193 symbol_bits_ = detail::SymbolBits(num_symbols);
197 const int bits_per_byte = 8;
198 size_t start_bit_idx = ((offset_ + 1) * symbol_bits_ - 1);
199 size_t start_byte_idx = start_bit_idx / bits_per_byte;
200 start_byte_idx += detail::kPadding;
203 uint64_t tmp =
static_cast<uint64_t
>(buffer_[start_byte_idx - 4]) << 32 |
204 static_cast<uint64_t>(buffer_[start_byte_idx - 3]) << 24 |
205 static_cast<uint64_t
>(buffer_[start_byte_idx - 2]) << 16 |
206 static_cast<uint64_t>(buffer_[start_byte_idx - 1]) << 8 |
207 buffer_[start_byte_idx];
209 (bits_per_byte - ((offset_ + 1) * symbol_bits_)) % bits_per_byte;
212 uint64_t mask = (
static_cast<uint64_t
>(1) << symbol_bits_) - 1;
213 return static_cast<T
>(tmp & mask);
217 self_type offset = (*this);
218 offset.offset_ += idx;
byte
Definition: span.h:112
T CheckBit(const T &byte, int bit_idx)
Definition: compressed_iterator.h:25
value_type * pointer
Definition: compressed_iterator.h:181
Writes bit compressed symbols to a memory buffer. Use CompressedIterator to read symbols back from bu...
Definition: compressed_iterator.h:52
CompressedIterator()
Definition: compressed_iterator.h:190
CompressedBufferWriter(size_t num_symbols)
Definition: compressed_iterator.h:58
XGBOOST_DEVICE reference operator[](size_t idx) const
Definition: compressed_iterator.h:216
static size_t CalculateBufferSize(size_t num_elements, size_t num_symbols)
Calculates number of bytes requiredm for a given number of elements and a symbol range.
Definition: compressed_iterator.h:78
void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end)
Definition: compressed_iterator.h:121
void ClearBit(CompressedByteT *byte, int bit_idx)
Definition: compressed_iterator.h:28
CompressedIterator(CompressedByteT *buffer, int num_symbols)
Definition: compressed_iterator.h:191
value_type reference
Definition: compressed_iterator.h:182
XGBOOST_DEVICE reference operator*() const
Definition: compressed_iterator.h:196
void SetBit(CompressedByteT *byte, int bit_idx)
Definition: compressed_iterator.h:21
#define XGBOOST_DEVICE
Tag function as usable by device.
Definition: base.h:84
namespace of xgboost
Definition: base.h:102
CompressedIterator< T > self_type
Definition: compressed_iterator.h:178
typename std::conditional< std::is_same< std::ptrdiff_t, std::int64_t >::value, std::ptrdiff_t, std::int64_t >::type ptrdiff_t
Definition: span.h:102
defines configuration macros of xgboost.
ptrdiff_t difference_type
Definition: compressed_iterator.h:179
unsigned char CompressedByteT
Definition: compressed_iterator.h:18
T value_type
Definition: compressed_iterator.h:180
Read symbols from a bit compressed memory buffer. Usable on device and host.
Definition: compressed_iterator.h:175
void WriteSymbol(CompressedByteT *buffer, T symbol, size_t offset)
Definition: compressed_iterator.h:87