Skip to content

Commit 47b2fbd

Browse files
authored
GH-43046: [C++] Fix avx2 gather rows more than 2^31 issue in CompareColumnsToRows (#43065)
### Rationale for this change See #43046. ### What changes are included in this PR? Use unsigned offset safe gather introduced in #42188 which is to fix similar issues. ### Are these changes tested? Yes. ### Are there any user-facing changes? None. * GitHub Issue: #43046 Lead-authored-by: Ruoxi Sun <zanmato1984@gmail.com> Co-authored-by: Rossi Sun <zanmato1984@gmail.com> Signed-off-by: Antoine Pitrou <antoine@python.org>
1 parent 3b7ad9d commit 47b2fbd

File tree

2 files changed

+36
-37
lines changed

2 files changed

+36
-37
lines changed

cpp/src/arrow/compute/row/compare_internal_avx2.cc

Lines changed: 35 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,40 @@ uint32_t KeyCompare::NullUpdateColumnToRowImp_avx2(
180180
}
181181
}
182182

183+
namespace {
184+
185+
// Intrinsics `_mm256_i32gather_epi32/64` treat the `vindex` as signed integer, and we
186+
// are using `uint32_t` to represent the offset, in range of [0, 4G), within the row
187+
// table. When the offset is larger than `0x80000000` (2GB), those intrinsics will treat
188+
// it as negative offset and gather the data from undesired address. To avoid this issue,
189+
// we normalize the addresses by translating `base` `0x80000000` higher, and `offset`
190+
// `0x80000000` lower. This way, the offset is always in range of [-2G, 2G) and those
191+
// intrinsics are safe.
192+
193+
constexpr uint64_t kTwoGB = 0x80000000ull;
194+
195+
template <uint32_t kScale>
196+
inline __m256i UnsignedOffsetSafeGather32(int const* base, __m256i offset) {
197+
int const* normalized_base = base + kTwoGB / sizeof(int);
198+
__m256i normalized_offset =
199+
_mm256_sub_epi32(offset, _mm256_set1_epi32(static_cast<int>(kTwoGB / kScale)));
200+
return _mm256_i32gather_epi32(normalized_base, normalized_offset,
201+
static_cast<int>(kScale));
202+
}
203+
204+
template <uint32_t kScale>
205+
inline __m256i UnsignedOffsetSafeGather64(arrow::util::int64_for_gather_t const* base,
206+
__m128i offset) {
207+
arrow::util::int64_for_gather_t const* normalized_base =
208+
base + kTwoGB / sizeof(arrow::util::int64_for_gather_t);
209+
__m128i normalized_offset =
210+
_mm_sub_epi32(offset, _mm_set1_epi32(static_cast<int>(kTwoGB / kScale)));
211+
return _mm256_i32gather_epi64(normalized_base, normalized_offset,
212+
static_cast<int>(kScale));
213+
}
214+
215+
} // namespace
216+
183217
template <bool use_selection, class COMPARE8_FN>
184218
uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2(
185219
uint32_t offset_within_row, uint32_t num_rows_to_compare,
@@ -236,10 +270,8 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2(
236270
irow_right =
237271
_mm256_loadu_si256(reinterpret_cast<const __m256i*>(left_to_right_map) + i);
238272
}
239-
// TODO: Need to test if this gather is OK when irow_right is larger than
240-
// 0x80000000u.
241273
__m256i offset_right =
242-
_mm256_i32gather_epi32((const int*)offsets_right, irow_right, 4);
274+
UnsignedOffsetSafeGather32<4>((int const*)offsets_right, irow_right);
243275
offset_right = _mm256_add_epi32(offset_right, _mm256_set1_epi32(offset_within_row));
244276

245277
reinterpret_cast<uint64_t*>(match_bytevector)[i] =
@@ -253,40 +285,6 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2(
253285
}
254286
}
255287

256-
namespace {
257-
258-
// Intrinsics `_mm256_i32gather_epi32/64` treat the `vindex` as signed integer, and we
259-
// are using `uint32_t` to represent the offset, in range of [0, 4G), within the row
260-
// table. When the offset is larger than `0x80000000` (2GB), those intrinsics will treat
261-
// it as negative offset and gather the data from undesired address. To avoid this issue,
262-
// we normalize the addresses by translating `base` `0x80000000` higher, and `offset`
263-
// `0x80000000` lower. This way, the offset is always in range of [-2G, 2G) and those
264-
// intrinsics are safe.
265-
266-
constexpr uint64_t kTwoGB = 0x80000000ull;
267-
268-
template <uint32_t kScale>
269-
inline __m256i UnsignedOffsetSafeGather32(int const* base, __m256i offset) {
270-
int const* normalized_base = base + kTwoGB / sizeof(int);
271-
__m256i normalized_offset =
272-
_mm256_sub_epi32(offset, _mm256_set1_epi32(static_cast<int>(kTwoGB / kScale)));
273-
return _mm256_i32gather_epi32(normalized_base, normalized_offset,
274-
static_cast<int>(kScale));
275-
}
276-
277-
template <uint32_t kScale>
278-
inline __m256i UnsignedOffsetSafeGather64(arrow::util::int64_for_gather_t const* base,
279-
__m128i offset) {
280-
arrow::util::int64_for_gather_t const* normalized_base =
281-
base + kTwoGB / sizeof(arrow::util::int64_for_gather_t);
282-
__m128i normalized_offset =
283-
_mm_sub_epi32(offset, _mm_set1_epi32(static_cast<int>(kTwoGB / kScale)));
284-
return _mm256_i32gather_epi64(normalized_base, normalized_offset,
285-
static_cast<int>(kScale));
286-
}
287-
288-
} // namespace
289-
290288
template <int column_width>
291289
inline uint64_t CompareSelected8_avx2(const uint8_t* left_base, const uint8_t* right_base,
292290
__m256i irow_left, __m256i offset_right,

cpp/src/arrow/compute/row/row_internal.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,7 @@ Status RowTableImpl::AppendSelectionFrom(const RowTableImpl& from,
325325
// Varying-length rows
326326
auto from_offsets = reinterpret_cast<const uint32_t*>(from.offsets_->data());
327327
auto to_offsets = reinterpret_cast<uint32_t*>(offsets_->mutable_data());
328+
// TODO(GH-43202): The following two variables are possibly overflowing.
328329
uint32_t total_length = to_offsets[num_rows_];
329330
uint32_t total_length_to_append = 0;
330331
for (uint32_t i = 0; i < num_rows_to_append; ++i) {

0 commit comments

Comments
 (0)