Skip to content

Commit be8b49a

Browse files
committed
GH-49266: [C++][Parquet] Optimize delta bit-packed decoding when bit-width = 0
1 parent cbe2618 commit be8b49a

File tree

3 files changed

+72
-42
lines changed

3 files changed

+72
-42
lines changed

cpp/src/parquet/decoder.cc

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1619,16 +1619,27 @@ class DeltaBitPackDecoder : public TypedDecoderImpl<DType> {
16191619

16201620
int values_decode = std::min(values_remaining_current_mini_block_,
16211621
static_cast<uint32_t>(max_values - i));
1622-
if (decoder_->GetBatch(delta_bit_width_, buffer + i, values_decode) !=
1623-
values_decode) {
1624-
ParquetException::EofException();
1625-
}
1626-
for (int j = 0; j < values_decode; ++j) {
1627-
// Addition between min_delta, packed int and last_value should be treated as
1628-
// unsigned addition. Overflow is as expected.
1629-
buffer[i + j] = static_cast<UT>(min_delta_) + static_cast<UT>(buffer[i + j]) +
1630-
static_cast<UT>(last_value_);
1631-
last_value_ = buffer[i + j];
1622+
if (delta_bit_width_ == 0) {
1623+
// Fast path that avoids a back-to-back dependency between two consecutive
1624+
// computations: we know all deltas decode to zero. We actually don't
1625+
// even need to decode them.
1626+
for (int j = 0; j < values_decode; ++j) {
1627+
buffer[i + j] = static_cast<UT>(last_value_) +
1628+
static_cast<UT>(j + 1) * static_cast<UT>(min_delta_);
1629+
}
1630+
last_value_ += static_cast<UT>(values_decode) * static_cast<UT>(min_delta_);
1631+
} else {
1632+
if (decoder_->GetBatch(delta_bit_width_, buffer + i, values_decode) !=
1633+
values_decode) {
1634+
ParquetException::EofException();
1635+
}
1636+
for (int j = 0; j < values_decode; ++j) {
1637+
// Addition between min_delta, packed int and last_value should be treated as
1638+
// unsigned addition. Overflow is as expected.
1639+
buffer[i + j] = static_cast<UT>(min_delta_) + static_cast<UT>(buffer[i + j]) +
1640+
static_cast<UT>(last_value_);
1641+
last_value_ = buffer[i + j];
1642+
}
16321643
}
16331644
values_remaining_current_mini_block_ -= values_decode;
16341645
i += values_decode;

cpp/src/parquet/encoder.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,6 +1036,8 @@ template <typename DType>
10361036
class DeltaBitPackEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
10371037
// Maximum possible header size
10381038
static constexpr uint32_t kMaxPageHeaderWriterSize = 32;
1039+
// If these constants are changed, then the corresponding values in
1040+
// TestDeltaBitPackEncoding (in `encoding_test.cc`) should be updated too.
10391041
static constexpr uint32_t kValuesPerBlock =
10401042
std::is_same_v<int32_t, typename DType::c_type> ? 128 : 256;
10411043
static constexpr uint32_t kMiniBlocksPerBlock = 4;

cpp/src/parquet/encoding_test.cc

Lines changed: 49 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <cstdint>
2222
#include <cstring>
2323
#include <limits>
24+
#include <span>
2425
#include <utility>
2526
#include <vector>
2627

@@ -119,14 +120,14 @@ TEST(VectorBooleanTest, TestEncodeIntDecode) {
119120
}
120121

121122
template <typename T>
122-
void VerifyResults(T* result, T* expected, int num_values) {
123+
void VerifyResults(const T* result, const T* expected, int num_values) {
123124
for (int i = 0; i < num_values; ++i) {
124125
ASSERT_EQ(expected[i], result[i]) << i;
125126
}
126127
}
127128

128129
template <typename T>
129-
void VerifyResultsSpaced(T* result, T* expected, int num_values,
130+
void VerifyResultsSpaced(const T* result, const T* expected, int num_values,
130131
const uint8_t* valid_bits, int64_t valid_bits_offset) {
131132
for (auto i = 0; i < num_values; ++i) {
132133
if (bit_util::GetBit(valid_bits, valid_bits_offset + i)) {
@@ -136,14 +137,14 @@ void VerifyResultsSpaced(T* result, T* expected, int num_values,
136137
}
137138

138139
template <>
139-
void VerifyResults<FLBA>(FLBA* result, FLBA* expected, int num_values) {
140+
void VerifyResults<FLBA>(const FLBA* result, const FLBA* expected, int num_values) {
140141
for (int i = 0; i < num_values; ++i) {
141142
ASSERT_EQ(0, memcmp(expected[i].ptr, result[i].ptr, kGenerateDataFLBALength)) << i;
142143
}
143144
}
144145

145146
template <>
146-
void VerifyResultsSpaced<FLBA>(FLBA* result, FLBA* expected, int num_values,
147+
void VerifyResultsSpaced<FLBA>(const FLBA* result, const FLBA* expected, int num_values,
147148
const uint8_t* valid_bits, int64_t valid_bits_offset) {
148149
for (auto i = 0; i < num_values; ++i) {
149150
if (bit_util::GetBit(valid_bits, valid_bits_offset + i)) {
@@ -1735,35 +1736,45 @@ class TestDeltaBitPackEncoding : public TestEncodingBase<Type> {
17351736
CheckRoundtripSpaced(valid_bits, valid_bits_offset);
17361737
}
17371738

1738-
void CheckDecoding() {
1739+
void CheckDecoding() { CheckDecoding(std::span(draws_, num_values_)); }
1740+
1741+
void CheckDecoding(std::span<const c_type> expected_values) {
1742+
const auto num_values = static_cast<int>(expected_values.size());
17391743
auto decoder = MakeTypedDecoder<Type>(Encoding::DELTA_BINARY_PACKED, descr_.get());
17401744
auto read_batch_sizes = kReadBatchSizes;
1741-
read_batch_sizes.push_back(num_values_);
1745+
read_batch_sizes.push_back(num_values);
17421746
// Exercise different batch sizes
17431747
for (const int read_batch_size : read_batch_sizes) {
1744-
decoder->SetData(num_values_, encode_buffer_->data(),
1748+
decoder->SetData(num_values, encode_buffer_->data(),
17451749
static_cast<int>(encode_buffer_->size()));
17461750

1751+
std::vector<c_type> decoded_values(num_values);
17471752
int values_decoded = 0;
1748-
while (values_decoded < num_values_) {
1749-
values_decoded += decoder->Decode(decode_buf_ + values_decoded, read_batch_size);
1753+
while (values_decoded < num_values) {
1754+
values_decoded +=
1755+
decoder->Decode(decoded_values.data() + values_decoded, read_batch_size);
17501756
}
1751-
ASSERT_EQ(num_values_, values_decoded);
1752-
ASSERT_NO_FATAL_FAILURE(VerifyResults<c_type>(decode_buf_, draws_, num_values_));
1757+
ASSERT_EQ(num_values, values_decoded);
1758+
ASSERT_NO_FATAL_FAILURE(VerifyResults<c_type>(decoded_values.data(),
1759+
expected_values.data(), num_values));
17531760
}
17541761
}
17551762

1756-
void CheckRoundtrip() override {
1763+
void CheckRoundtripWithValues(std::span<const c_type> values) {
17571764
auto encoder = MakeTypedEncoder<Type>(Encoding::DELTA_BINARY_PACKED,
17581765
/*use_dictionary=*/false, descr_.get());
17591766
// Encode a number of times to exercise the flush logic
17601767
for (size_t i = 0; i < kNumRoundTrips; ++i) {
1761-
encoder->Put(draws_, num_values_);
1768+
encoder->Put(values.data(), static_cast<int>(values.size()));
17621769
encode_buffer_ = encoder->FlushValues();
1763-
CheckDecoding();
1770+
CheckDecoding(values);
17641771
}
17651772
}
17661773

1774+
void CheckRoundtrip() override {
1775+
CheckRoundtripWithValues(std::span(draws_, num_values_));
1776+
}
1777+
17671778
void CheckRoundtripSpaced(const uint8_t* valid_bits,
17681779
int64_t valid_bits_offset) override {
17691780
auto encoder = MakeTypedEncoder<Type>(Encoding::DELTA_BINARY_PACKED,
@@ -1920,24 +1931,7 @@ TYPED_TEST(TestDeltaBitPackEncoding, DeltaBitPackedWrapping) {
19201931
1,
19211932
-1,
19221933
1};
1923-
const int num_values = static_cast<int>(int_values.size());
1924-
1925-
const auto encoder = MakeTypedEncoder<TypeParam>(
1926-
Encoding::DELTA_BINARY_PACKED, /*use_dictionary=*/false, this->descr_.get());
1927-
encoder->Put(int_values, num_values);
1928-
const auto encoded = encoder->FlushValues();
1929-
1930-
const auto decoder =
1931-
MakeTypedDecoder<TypeParam>(Encoding::DELTA_BINARY_PACKED, this->descr_.get());
1932-
1933-
std::vector<T> decoded(num_values);
1934-
decoder->SetData(num_values, encoded->data(), static_cast<int>(encoded->size()));
1935-
1936-
const int values_decoded = decoder->Decode(decoded.data(), num_values);
1937-
1938-
ASSERT_EQ(num_values, values_decoded);
1939-
ASSERT_NO_FATAL_FAILURE(
1940-
VerifyResults<T>(decoded.data(), int_values.data(), num_values));
1934+
this->CheckRoundtripWithValues(int_values);
19411935
}
19421936

19431937
// Test that the DELTA_BINARY_PACKED encoding does not use more bits to encode than
@@ -1967,6 +1961,29 @@ TYPED_TEST(TestDeltaBitPackEncoding, DeltaBitPackedSize) {
19671961
ASSERT_EQ(encoded->size(), encoded_size);
19681962
}
19691963

1964+
TYPED_TEST(TestDeltaBitPackEncoding, ZeroDeltaBitWidth) {
1965+
// Exercise ranges of zero deltas interspersed between ranges of non-zero deltas.
1966+
// This checks that the zero bit-width optimization in GH-49266 doesn't mess
1967+
// decoder state.
1968+
using T = typename TypeParam::c_type;
1969+
1970+
// At least the size of a block
1971+
constexpr int kRangeSize = 256;
1972+
1973+
std::vector<T> int_values;
1974+
for (int i = 0; i < kRangeSize; ++i) {
1975+
int_values.push_back((i * 7) % 11);
1976+
}
1977+
// Range of equal values, should emit zero-width deltas
1978+
for (int i = 0; i < kRangeSize * 2; ++i) {
1979+
int_values.push_back(42);
1980+
}
1981+
for (int i = 0; i < kRangeSize; ++i) {
1982+
int_values.push_back((i * 5) % 7);
1983+
}
1984+
this->CheckRoundtripWithValues(int_values);
1985+
}
1986+
19701987
// ----------------------------------------------------------------------
19711988
// Rle for Boolean encode/decode tests.
19721989

0 commit comments

Comments
 (0)