|
37 | 37 | #include "arrow/array/builder_binary.h" |
38 | 38 | #include "arrow/array/builder_decimal.h" |
39 | 39 | #include "arrow/array/builder_dict.h" |
| 40 | +#include "arrow/array/builder_run_end.h" |
40 | 41 | #include "arrow/array/builder_time.h" |
41 | 42 | #include "arrow/array/data.h" |
42 | 43 | #include "arrow/array/util.h" |
@@ -83,15 +84,48 @@ TEST_F(TestArray, TestNullCount) { |
83 | 84 | auto data = std::make_shared<Buffer>(nullptr, 0); |
84 | 85 | auto null_bitmap = std::make_shared<Buffer>(nullptr, 0); |
85 | 86 |
|
86 | | - std::unique_ptr<Int32Array> arr(new Int32Array(100, data, null_bitmap, 10)); |
| 87 | + std::shared_ptr<Int32Array> arr(new Int32Array(100, data, null_bitmap, 10)); |
| 88 | + ASSERT_EQ(10, arr->ComputeLogicalNullCount()); |
87 | 89 | ASSERT_EQ(10, arr->null_count()); |
| 90 | + ASSERT_TRUE(arr->data()->MayHaveNulls()); |
| 91 | + ASSERT_TRUE(arr->data()->MayHaveLogicalNulls()); |
88 | 92 |
|
89 | | - std::unique_ptr<Int32Array> arr_no_nulls(new Int32Array(100, data)); |
| 93 | + std::shared_ptr<Int32Array> arr_no_nulls(new Int32Array(100, data)); |
| 94 | + ASSERT_EQ(0, arr_no_nulls->ComputeLogicalNullCount()); |
90 | 95 | ASSERT_EQ(0, arr_no_nulls->null_count()); |
| 96 | + ASSERT_FALSE(arr_no_nulls->data()->MayHaveNulls()); |
| 97 | + ASSERT_FALSE(arr_no_nulls->data()->MayHaveLogicalNulls()); |
91 | 98 |
|
92 | | - std::unique_ptr<Int32Array> arr_default_null_count( |
| 99 | + std::shared_ptr<Int32Array> arr_default_null_count( |
93 | 100 | new Int32Array(100, data, null_bitmap)); |
94 | 101 | ASSERT_EQ(kUnknownNullCount, arr_default_null_count->data()->null_count); |
| 102 | + ASSERT_TRUE(arr_default_null_count->data()->MayHaveNulls()); |
| 103 | + ASSERT_TRUE(arr_default_null_count->data()->MayHaveLogicalNulls()); |
| 104 | + |
| 105 | + RunEndEncodedBuilder ree_builder(pool_, std::make_shared<Int32Builder>(pool_), |
| 106 | + std::make_shared<Int32Builder>(pool_), |
| 107 | + run_end_encoded(int32(), int32())); |
| 108 | + ASSERT_OK(ree_builder.AppendScalar(*MakeScalar<int32_t>(2), 2)); |
| 109 | + ASSERT_OK(ree_builder.AppendNull()); |
| 110 | + ASSERT_OK(ree_builder.AppendScalar(*MakeScalar<int32_t>(4), 3)); |
| 111 | + ASSERT_OK(ree_builder.AppendNulls(2)); |
| 112 | + ASSERT_OK(ree_builder.AppendScalar(*MakeScalar<int32_t>(8), 5)); |
| 113 | + ASSERT_OK(ree_builder.AppendNulls(7)); |
| 114 | + ASSERT_OK_AND_ASSIGN(auto ree, ree_builder.Finish()); |
| 115 | + |
| 116 | + ASSERT_EQ(0, ree->null_count()); |
| 117 | + ASSERT_EQ(10, ree->ComputeLogicalNullCount()); |
| 118 | + ASSERT_FALSE(ree->data()->MayHaveNulls()); |
| 119 | + ASSERT_TRUE(ree->data()->MayHaveLogicalNulls()); |
| 120 | + |
| 121 | + ASSERT_OK(ree_builder.AppendScalar(*MakeScalar<int32_t>(2), 2)); |
| 122 | + ASSERT_OK(ree_builder.AppendScalar(*MakeScalar<int32_t>(4), 3)); |
| 123 | + ASSERT_OK(ree_builder.AppendScalar(*MakeScalar<int32_t>(8), 5)); |
| 124 | + ASSERT_OK_AND_ASSIGN(auto ree_no_nulls, ree_builder.Finish()); |
| 125 | + ASSERT_EQ(0, ree_no_nulls->null_count()); |
| 126 | + ASSERT_EQ(0, ree_no_nulls->ComputeLogicalNullCount()); |
| 127 | + ASSERT_FALSE(ree_no_nulls->data()->MayHaveNulls()); |
| 128 | + ASSERT_FALSE(ree_no_nulls->data()->MayHaveLogicalNulls()); |
95 | 129 | } |
96 | 130 |
|
97 | 131 | TEST_F(TestArray, TestSlicePreservesAllNullCount) { |
@@ -377,20 +411,23 @@ TEST_F(TestArray, TestMakeArrayOfNull) { |
377 | 411 | ASSERT_EQ(array->length(), length); |
378 | 412 | if (is_union(type->id())) { |
379 | 413 | ASSERT_EQ(array->null_count(), 0); |
| 414 | + ASSERT_EQ(array->ComputeLogicalNullCount(), length); |
380 | 415 | const auto& union_array = checked_cast<const UnionArray&>(*array); |
381 | 416 | for (int i = 0; i < union_array.num_fields(); ++i) { |
382 | 417 | ASSERT_EQ(union_array.field(i)->null_count(), union_array.field(i)->length()); |
383 | 418 | } |
384 | 419 | } else if (type->id() == Type::RUN_END_ENCODED) { |
385 | 420 | ASSERT_EQ(array->null_count(), 0); |
| 421 | + ASSERT_EQ(array->ComputeLogicalNullCount(), length); |
386 | 422 | const auto& ree_array = checked_cast<const RunEndEncodedArray&>(*array); |
387 | 423 | ASSERT_EQ(ree_array.values()->null_count(), ree_array.values()->length()); |
388 | 424 | } else { |
389 | 425 | ASSERT_EQ(array->null_count(), length); |
390 | | - for (int64_t i = 0; i < length; ++i) { |
391 | | - ASSERT_TRUE(array->IsNull(i)); |
392 | | - ASSERT_FALSE(array->IsValid(i)); |
393 | | - } |
| 426 | + ASSERT_EQ(array->ComputeLogicalNullCount(), length); |
| 427 | + } |
| 428 | + for (int64_t i = 0; i < length; ++i) { |
| 429 | + ASSERT_TRUE(array->IsNull(i)); |
| 430 | + ASSERT_FALSE(array->IsValid(i)); |
394 | 431 | } |
395 | 432 | } |
396 | 433 | } |
@@ -482,35 +519,45 @@ void AssertAppendScalar(MemoryPool* pool, const std::shared_ptr<Scalar>& scalar) |
482 | 519 | std::unique_ptr<arrow::ArrayBuilder> builder; |
483 | 520 | auto null_scalar = MakeNullScalar(scalar->type); |
484 | 521 | ASSERT_OK(MakeBuilderExactIndex(pool, scalar->type, &builder)); |
485 | | - ASSERT_OK(builder->AppendScalar(*scalar)); |
486 | | - ASSERT_OK(builder->AppendScalar(*scalar)); |
487 | | - ASSERT_OK(builder->AppendScalar(*null_scalar)); |
488 | | - ASSERT_OK(builder->AppendScalars({scalar, null_scalar})); |
489 | | - ASSERT_OK(builder->AppendScalar(*scalar, /*n_repeats=*/2)); |
490 | | - ASSERT_OK(builder->AppendScalar(*null_scalar, /*n_repeats=*/2)); |
| 522 | + ASSERT_OK(builder->AppendScalar(*scalar)); // [0] = scalar |
| 523 | + ASSERT_OK(builder->AppendScalar(*scalar)); // [1] = scalar |
| 524 | + ASSERT_OK(builder->AppendScalar(*null_scalar)); // [2] = NULL |
| 525 | + ASSERT_OK(builder->AppendScalars({scalar, null_scalar})); // [3, 4] = {scalar, NULL} |
| 526 | + ASSERT_OK( |
| 527 | + builder->AppendScalar(*scalar, /*n_repeats=*/2)); // [5, 6] = {scalar, scalar} |
| 528 | + ASSERT_OK( |
| 529 | + builder->AppendScalar(*null_scalar, /*n_repeats=*/2)); // [7, 8] = {NULL, NULL} |
491 | 530 |
|
492 | 531 | std::shared_ptr<Array> out; |
493 | 532 | FinishAndCheckPadding(builder.get(), &out); |
494 | 533 | ASSERT_OK(out->ValidateFull()); |
495 | 534 | AssertTypeEqual(scalar->type, out->type()); |
496 | 535 | ASSERT_EQ(out->length(), 9); |
497 | 536 |
|
498 | | - const bool can_check_nulls = internal::HasValidityBitmap(out->type()->id()); |
| 537 | + auto out_type_id = out->type()->id(); |
| 538 | + const bool has_validity = internal::HasValidityBitmap(out_type_id); |
499 | 539 | // For a dictionary builder, the output dictionary won't necessarily be the same |
500 | | - const bool can_check_values = !is_dictionary(out->type()->id()); |
| 540 | + const bool can_check_values = !is_dictionary(out_type_id); |
501 | 541 |
|
502 | | - if (can_check_nulls) { |
| 542 | + if (has_validity) { |
503 | 543 | ASSERT_EQ(out->null_count(), 4); |
| 544 | + } else { |
| 545 | + ASSERT_EQ(out->null_count(), 0); |
| 546 | + } |
| 547 | + if (scalar->is_valid) { |
| 548 | + ASSERT_EQ(out->ComputeLogicalNullCount(), 4); |
| 549 | + } else { |
| 550 | + ASSERT_EQ(out->ComputeLogicalNullCount(), 9); |
504 | 551 | } |
505 | 552 |
|
506 | 553 | for (const auto index : {0, 1, 3, 5, 6}) { |
507 | | - ASSERT_FALSE(out->IsNull(index)); |
| 554 | + ASSERT_NE(out->IsNull(index), scalar->is_valid); |
508 | 555 | ASSERT_OK_AND_ASSIGN(auto scalar_i, out->GetScalar(index)); |
509 | 556 | ASSERT_OK(scalar_i->ValidateFull()); |
510 | 557 | if (can_check_values) AssertScalarsEqual(*scalar, *scalar_i, /*verbose=*/true); |
511 | 558 | } |
512 | 559 | for (const auto index : {2, 4, 7, 8}) { |
513 | | - ASSERT_EQ(out->IsNull(index), can_check_nulls); |
| 560 | + ASSERT_TRUE(out->IsNull(index)); |
514 | 561 | ASSERT_OK_AND_ASSIGN(auto scalar_i, out->GetScalar(index)); |
515 | 562 | ASSERT_OK(scalar_i->ValidateFull()); |
516 | 563 | AssertScalarsEqual(*null_scalar, *scalar_i, /*verbose=*/true); |
|
0 commit comments