14 #include "device_helpers.cuh"
24 *
byte |= 1 << bit_idx;
27 inline T
CheckBit(
const T &
byte,
int bit_idx) {
28 return byte & (1 << bit_idx);
31 *
byte &= ~(1 << bit_idx);
33 static const int kPadding = 4;
38 auto bits = std::ceil(log2(
static_cast<double>(num_symbols)));
39 return common::Max(
static_cast<size_t>(bits),
size_t(1));
59 : symbol_bits_(detail::
SymbolBits(num_symbols)) {}
77 constexpr
int kBitsPerByte = 8;
78 size_t compressed_size =
static_cast<size_t>(std::ceil(
83 std::ceil(
static_cast<double>(compressed_size + detail::kPadding) /
84 static_cast<double>(
sizeof(
unsigned int))) *
91 const int bits_per_byte = 8;
93 for (
size_t i = 0; i < symbol_bits_; i++) {
94 size_t byte_idx = ((offset + 1) * symbol_bits_ - (i + 1)) / bits_per_byte;
95 byte_idx += detail::kPadding;
97 ((bits_per_byte + i) - ((offset + 1) * symbol_bits_)) % bits_per_byte;
108 __device__
void AtomicWriteSymbol
110 size_t ibit_start = offset * symbol_bits_;
111 size_t ibit_end = (offset + 1) * symbol_bits_ - 1;
112 size_t ibyte_start = ibit_start / 8, ibyte_end = ibit_end / 8;
114 symbol <<= 7 - ibit_end % 8;
115 for (
ptrdiff_t ibyte = ibyte_end; ibyte >=
static_cast<ptrdiff_t>(ibyte_start); --ibyte) {
116 dh::AtomicOrByte(
reinterpret_cast<unsigned int*
>(buffer + detail::kPadding),
117 ibyte, symbol & 0xff);
123 template <
typename IterT>
126 size_t stored_bits = 0;
127 const size_t max_stored_bits = 64 - symbol_bits_;
128 size_t buffer_position = detail::kPadding;
129 const size_t num_symbols = input_end - input_begin;
130 for (
size_t i = 0; i < num_symbols; i++) {
131 typename std::iterator_traits<IterT>::value_type symbol = input_begin[i];
132 if (stored_bits > max_stored_bits) {
134 size_t tmp_bytes = stored_bits / 8;
135 for (
size_t j = 0; j < tmp_bytes; j++) {
137 tmp >> (stored_bits - (j + 1) * 8));
140 stored_bits -= tmp_bytes * 8;
141 tmp &= (1 << stored_bits) - 1;
144 tmp <<= symbol_bits_;
146 stored_bits += symbol_bits_;
151 static_cast<int>(std::ceil(
static_cast<float>(stored_bits) / 8));
152 for (
int j = 0; j < tmp_bytes; j++) {
153 int shift_bits =
static_cast<int>(stored_bits) - (j + 1) * 8;
154 if (shift_bits >= 0) {
155 buffer[buffer_position] =
158 buffer[buffer_position] =
174 template <
typename T>
186 size_t symbol_bits_ {0};
197 const int bits_per_byte = 8;
198 size_t start_bit_idx = ((offset_ + 1) * symbol_bits_ - 1);
199 size_t start_byte_idx = start_bit_idx / bits_per_byte;
200 start_byte_idx += detail::kPadding;
203 uint64_t tmp =
static_cast<uint64_t
>(buffer_[start_byte_idx - 4]) << 32 |
204 static_cast<uint64_t
>(buffer_[start_byte_idx - 3]) << 24 |
205 static_cast<uint64_t
>(buffer_[start_byte_idx - 2]) << 16 |
206 static_cast<uint64_t
>(buffer_[start_byte_idx - 1]) << 8 |
207 buffer_[start_byte_idx];
209 (bits_per_byte - ((offset_ + 1) * symbol_bits_)) % bits_per_byte;
212 uint64_t mask = (
static_cast<uint64_t
>(1) << symbol_bits_) - 1;
213 return static_cast<T
>(tmp & mask);
218 offset.offset_ += idx;