Skip to content

Commit 8cc71ab

Browse files
authored
GH-35344: [C++][Format] Implementation of the LIST_VIEW and LARGE_LIST_VIEW array formats (#35345)
### Rationale for this change Mailing list discussion: https://lists.apache.org/thread/r28rw5n39jwtvn08oljl09d4q2c1ysvb ### What changes are included in this PR? Initial implementation of the new format in C++. ### Are these changes tested? Unit tests being written on every commit adding new functionality. More needs to be implemented for Integration Tests (required) to be implementable. ### Are there any user-facing changes? A new array format. It should have no impact for users that don't use it. * Closes: #35344 Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com> Signed-off-by: Antoine Pitrou <antoine@python.org>
1 parent 8627921 commit 8cc71ab

63 files changed

Lines changed: 4401 additions & 504 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

cpp/src/arrow/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ set(ARROW_SRCS
229229
util/hashing.cc
230230
util/int_util.cc
231231
util/io_util.cc
232+
util/list_util.cc
232233
util/logging.cc
233234
util/key_value_metadata.cc
234235
util/memory.cc
@@ -790,6 +791,7 @@ add_arrow_test(array_test
790791
array/array_binary_test.cc
791792
array/array_dict_test.cc
792793
array/array_list_test.cc
794+
array/array_list_view_test.cc
793795
array/array_run_end_test.cc
794796
array/array_struct_test.cc
795797
array/array_union_test.cc

cpp/src/arrow/array/array_base.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ struct ScalarFromArraySlotImpl {
9595
Status Visit(const MonthDayNanoIntervalArray& a) { return Finish(a.Value(index_)); }
9696

9797
template <typename T>
98-
Status Visit(const BaseListArray<T>& a) {
98+
Status Visit(const VarLengthListLikeArray<T>& a) {
9999
return Finish(a.value_slice(index_));
100100
}
101101

cpp/src/arrow/array/array_list_test.cc

Lines changed: 393 additions & 53 deletions
Large diffs are not rendered by default.
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include <gtest/gtest.h>
19+
20+
#include "arrow/array/array_nested.h"
21+
#include "arrow/array/util.h"
22+
#include "arrow/pretty_print.h"
23+
#include "arrow/testing/gtest_util.h"
24+
#include "arrow/type_fwd.h"
25+
#include "arrow/util/checked_cast.h"
26+
27+
namespace arrow {
28+
29+
using internal::checked_cast;
30+
31+
// ----------------------------------------------------------------------
32+
// List-view array tests
33+
34+
namespace {
35+
36+
class TestListViewArray : public ::testing::Test {
37+
public:
38+
std::shared_ptr<Array> string_values;
39+
std::shared_ptr<Array> int32_values;
40+
std::shared_ptr<Array> int16_values;
41+
42+
void SetUp() override {
43+
string_values = ArrayFromJSON(utf8(), R"(["Hello", "World", null])");
44+
int32_values = ArrayFromJSON(int32(), "[1, 20, 3]");
45+
int16_values = ArrayFromJSON(int16(), "[10, 2, 30]");
46+
}
47+
48+
static std::shared_ptr<Array> Offsets(std::string_view json) {
49+
return ArrayFromJSON(int32(), json);
50+
}
51+
52+
static std::shared_ptr<Array> Sizes(std::string_view json) {
53+
return ArrayFromJSON(int32(), json);
54+
}
55+
};
56+
57+
} // namespace
58+
59+
TEST_F(TestListViewArray, MakeArray) {
60+
ASSERT_OK_AND_ASSIGN(auto list_view_array,
61+
ListViewArray::FromArrays(*Offsets("[0, 0, 1, 2]"),
62+
*Sizes("[2, 1, 1, 1]"), *string_values));
63+
auto array_data = list_view_array->data();
64+
auto new_array = MakeArray(array_data);
65+
ASSERT_ARRAYS_EQUAL(*new_array, *list_view_array);
66+
// Should be the exact same ArrayData object
67+
ASSERT_EQ(new_array->data(), array_data);
68+
ASSERT_NE(std::dynamic_pointer_cast<ListViewArray>(new_array), NULLPTR);
69+
}
70+
71+
TEST_F(TestListViewArray, FromOffsetsAndSizes) {
72+
std::shared_ptr<ListViewArray> list_view_array;
73+
74+
ASSERT_OK_AND_ASSIGN(list_view_array, ListViewArray::FromArrays(
75+
*Offsets("[0, 0, 1, 1000]"),
76+
*Sizes("[2, 1, 1, null]"), *int32_values));
77+
ASSERT_EQ(list_view_array->length(), 4);
78+
ASSERT_ARRAYS_EQUAL(*list_view_array->values(), *int32_values);
79+
ASSERT_EQ(list_view_array->offset(), 0);
80+
ASSERT_EQ(list_view_array->data()->GetNullCount(), 1);
81+
ASSERT_EQ(list_view_array->data()->buffers.size(), 3);
82+
}
83+
84+
} // namespace arrow

0 commit comments

Comments
 (0)