@@ -657,13 +657,14 @@ auto RleBitPackedParser::PeekImpl(Handler&& handler) const
657657 const auto header_bytes = bit_util::ParseLeadingLEB128 (data_, kMaxSize , &run_len_type);
658658
659659 if (ARROW_PREDICT_FALSE (header_bytes == 0 )) {
660- // Malfomrmed LEB128 data
660+ // Malformed LEB128 data
661661 return {0 , ControlFlow::Break};
662662 }
663663
664664 const bool is_bit_packed = run_len_type & 1 ;
665665 const uint32_t count = run_len_type >> 1 ;
666666 if (is_bit_packed) {
667+ // Bit-packed run
667668 constexpr auto kMaxCount = bit_util::CeilDiv (internal::max_size_for_v<rle_size_t >, 8 );
668669 if (ARROW_PREDICT_FALSE (count == 0 || count > kMaxCount )) {
669670 // Illegal number of encoded values
@@ -672,17 +673,21 @@ auto RleBitPackedParser::PeekImpl(Handler&& handler) const
672673
673674 ARROW_DCHECK_LT (static_cast <uint64_t >(count) * 8 ,
674675 internal::max_size_for_v<rle_size_t >);
676+ // Count Already divided by 8 for byte size calculations
677+ const auto bytes_read = header_bytes + static_cast <int64_t >(count) * value_bit_width_;
678+ if (ARROW_PREDICT_FALSE (bytes_read > data_size_)) {
679+ // Bit-packed run would overflow data buffer
680+ return {0 , ControlFlow::Break};
681+ }
675682 const auto values_count = static_cast <rle_size_t >(count * 8 );
676- // Count Already divided by 8
677- const auto bytes_read =
678- header_bytes + static_cast <rle_size_t >(count) * value_bit_width_;
679683
680684 auto control = handler.OnBitPackedRun (
681685 BitPackedRun (data_ + header_bytes, values_count, value_bit_width_));
682686
683687 return {bytes_read, control};
684688 }
685689
690+ // RLE run
686691 if (ARROW_PREDICT_FALSE (count == 0 )) {
687692 // Illegal number of encoded values
688693 return {0 , ControlFlow::Break};
@@ -1079,7 +1084,6 @@ auto RleBitPackedDecoder<T>::GetSpaced(Converter converter,
10791084 // There may be remaining null if they are not greedily filled by either decoder calls
10801085 check_and_handle_fully_null_remaining ();
10811086
1082- ARROW_DCHECK (batch.is_done () || exhausted ());
10831087 return batch.total_read ();
10841088}
10851089
0 commit comments