Skip to content

Commit e7a885d

Browse files
micah-whitepitrou
andauthored
GH-35521: [C++] Hash null bitmap only if null count is 0 (#35522)
### Are these changes tested? I am new to contributing and am having a hard time creating a good test for this. The steps to reproduce this bug originally are too complicated for a simple test. I included my attempt at making a good test in the PR, but some help would be nice. --> ### Are there any user-facing changes? No. **This PR contains a "Critical Fix".** * Closes: #35521 Lead-authored-by: micah-white <micahwhitecs@gmail.com> Co-authored-by: Antoine Pitrou <antoine@python.org> Signed-off-by: Antoine Pitrou <antoine@python.org>
1 parent 401ae19 commit e7a885d

2 files changed

Lines changed: 23 additions & 3 deletions

File tree

cpp/src/arrow/scalar.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,9 +153,10 @@ struct ScalarHashImpl {
153153

154154
Status ArrayHash(const ArrayData& a) {
155155
RETURN_NOT_OK(StdHash(a.length) & StdHash(a.GetNullCount()));
156-
if (a.buffers[0] != nullptr) {
156+
if (a.GetNullCount() != 0 && a.buffers[0] != nullptr) {
157157
// We can't visit values without unboxing the whole array, so only hash
158-
// the null bitmap for now.
158+
// the null bitmap for now. Only hash the null bitmap if the null count
159+
// is not 0 to ensure hash consistency.
159160
RETURN_NOT_OK(BufferHash(*a.buffers[0]));
160161
}
161162
for (const auto& child : a.child_data) {

cpp/src/arrow/scalar_test.cc

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1064,7 +1064,6 @@ class TestListScalar : public ::testing::Test {
10641064
using ScalarType = typename TypeTraits<T>::ScalarType;
10651065

10661066
void SetUp() {
1067-
// type_ = std::make_shared<T>(int16());
10681067
type_ = MakeListType<T>(int16(), 3);
10691068
value_ = ArrayFromJSON(int16(), "[1, 2, null]");
10701069
}
@@ -1106,6 +1105,24 @@ class TestListScalar : public ::testing::Test {
11061105
ASSERT_RAISES(Invalid, scalar.ValidateFull());
11071106
}
11081107

1108+
void TestHashing() {
1109+
// GH-35521: the hash value of a non-null list scalar should not
1110+
// depend on the presence or absence of a null bitmap in the underlying
1111+
// list values.
1112+
ScalarType empty_bitmap_scalar(ArrayFromJSON(int16(), "[1, 2, 3]"));
1113+
ASSERT_OK(empty_bitmap_scalar.ValidateFull());
1114+
// Underlying list array doesn't have a null bitmap
1115+
ASSERT_EQ(empty_bitmap_scalar.value->data()->buffers[0], nullptr);
1116+
1117+
auto list_array = ArrayFromJSON(type_, "[[1, 2, 3], [4, 5, null]]");
1118+
ASSERT_OK_AND_ASSIGN(auto set_bitmap_scalar_uncasted, list_array->GetScalar(0));
1119+
auto set_bitmap_scalar = checked_pointer_cast<ScalarType>(set_bitmap_scalar_uncasted);
1120+
// Underlying list array has a null bitmap
1121+
ASSERT_NE(set_bitmap_scalar->value->data()->buffers[0], nullptr);
1122+
// ... yet it's hashing equal to the other scalar
1123+
ASSERT_EQ(empty_bitmap_scalar.hash(), set_bitmap_scalar->hash());
1124+
}
1125+
11091126
protected:
11101127
std::shared_ptr<DataType> type_;
11111128
std::shared_ptr<Array> value_;
@@ -1119,6 +1136,8 @@ TYPED_TEST(TestListScalar, Basics) { this->TestBasics(); }
11191136

11201137
TYPED_TEST(TestListScalar, ValidateErrors) { this->TestValidateErrors(); }
11211138

1139+
TYPED_TEST(TestListScalar, TestHashing) { this->TestHashing(); }
1140+
11221141
TEST(TestFixedSizeListScalar, ValidateErrors) {
11231142
const auto ty = fixed_size_list(int16(), 3);
11241143
FixedSizeListScalar scalar(ArrayFromJSON(int16(), "[1, 2, 5]"), ty);

0 commit comments

Comments
 (0)