@@ -627,7 +627,8 @@ def test_string_binary_from_buffers():
627627 assert copied .null_count == 0
628628
629629
630- @pytest .mark .parametrize ('list_type_factory' , [pa .list_ , pa .large_list ])
630+ @pytest .mark .parametrize ('list_type_factory' , [
631+ pa .list_ , pa .large_list , pa .list_view , pa .large_list_view ])
631632def test_list_from_buffers (list_type_factory ):
632633 ty = list_type_factory (pa .int16 ())
633634 array = pa .array ([[0 , 1 , 2 ], None , [], [3 , 4 , 5 ]], type = ty )
@@ -637,15 +638,15 @@ def test_list_from_buffers(list_type_factory):
637638
638639 with pytest .raises (ValueError ):
639640 # No children
640- pa .Array .from_buffers (ty , 4 , [ None , buffers [1 ] ])
641+ pa .Array .from_buffers (ty , 4 , buffers [: ty . num_buffers ])
641642
642- child = pa .Array .from_buffers (pa .int16 (), 6 , buffers [2 :])
643- copied = pa .Array .from_buffers (ty , 4 , buffers [:2 ], children = [child ])
643+ child = pa .Array .from_buffers (pa .int16 (), 6 , buffers [ty . num_buffers :])
644+ copied = pa .Array .from_buffers (ty , 4 , buffers [:ty . num_buffers ], children = [child ])
644645 assert copied .equals (array )
645646
646647 with pytest .raises (ValueError ):
647648 # too many children
648- pa .Array .from_buffers (ty , 4 , [ None , buffers [1 ] ],
649+ pa .Array .from_buffers (ty , 4 , buffers [: ty . num_buffers ],
649650 children = [child , child ])
650651
651652
@@ -2022,6 +2023,9 @@ def test_cast_identities(ty, values):
20222023 ([[1 , 2 ], [3 ]], pa .list_ (pa .int64 ())),
20232024 ([[4 , 5 ], [6 ]], pa .large_list (pa .int16 ())),
20242025 ([['a' ], None , ['b' , 'c' ]], pa .list_ (pa .string ())),
2026+ ([[1 , 2 ], [3 ]], pa .list_view (pa .int64 ())),
2027+ ([[4 , 5 ], [6 ]], pa .large_list_view (pa .int16 ())),
2028+ ([['a' ], None , ['b' , 'c' ]], pa .list_view (pa .string ())),
20252029 ([(1 , 'a' ), (2 , 'c' ), None ],
20262030 pa .struct ([pa .field ('a' , pa .int64 ()), pa .field ('b' , pa .string ())]))
20272031 ]
@@ -3575,9 +3579,10 @@ def test_run_end_encoded_from_buffers():
35753579 1 , offset , children )
35763580
35773581
3578- @pytest .mark .parametrize (('list_array_type' ),
3579- [pa .ListViewArray , pa .LargeListViewArray ])
3580- def test_list_view_from_arrays (list_array_type ):
3582+ @pytest .mark .parametrize (('list_array_type' , 'list_type_factory' ),
3583+ [(pa .ListViewArray , pa .list_view ),
3584+ (pa .LargeListViewArray , pa .large_list_view )])
3585+ def test_list_view_from_arrays (list_array_type , list_type_factory ):
35813586 # test in order offsets, similar to ListArray representation
35823587 values = [1 , 2 , 3 , 4 , 5 , 6 , None , 7 ]
35833588 offsets = [0 , 2 , 4 , 6 ]
@@ -3589,6 +3594,17 @@ def test_list_view_from_arrays(list_array_type):
35893594 assert array .offsets .to_pylist () == offsets
35903595 assert array .sizes .to_pylist () == sizes
35913596
3597+ # with specified type
3598+ typ = list_type_factory (pa .field ("name" , pa .int64 ()))
3599+ result = list_array_type .from_arrays (offsets , sizes , values , typ )
3600+ assert result .type == typ
3601+ assert result .type .value_field .name == "name"
3602+
3603+ # with mismatching type
3604+ typ = list_type_factory (pa .binary ())
3605+ with pytest .raises (TypeError ):
3606+ list_array_type .from_arrays (offsets , sizes , values , type = typ )
3607+
35923608 # test out of order offsets with overlapping values
35933609 values = [1 , 2 , 3 , 4 ]
35943610 offsets = [2 , 1 , 0 ]
@@ -3635,12 +3651,121 @@ def test_list_view_from_arrays(list_array_type):
36353651 assert array .sizes .to_pylist () == sizes
36363652
36373653
3638- @pytest .mark .parametrize (('list_array_type' ),
3639- [pa .ListViewArray , pa .LargeListViewArray ])
3640- def test_list_view_flatten (list_array_type ):
3654+ @pytest .mark .parametrize (('list_array_type' , 'list_type_factory' ),
3655+ [(pa .ListViewArray , pa .list_view ),
3656+ (pa .LargeListViewArray , pa .large_list_view )])
3657+ def test_list_view_from_arrays_fails (list_array_type , list_type_factory ):
3658+ values = [1 , 2 ]
3659+ offsets = [0 , 1 , None ]
3660+ sizes = [1 , 1 , 0 ]
3661+ mask = pa .array ([False , False , True ])
3662+
3663+ # Ambiguous to specify both validity map and offsets or sizes with nulls
3664+ with pytest .raises (pa .lib .ArrowInvalid ):
3665+ list_array_type .from_arrays (offsets , sizes , values , mask = mask )
3666+
3667+ offsets = [0 , 1 , 1 ]
3668+ array = list_array_type .from_arrays (offsets , sizes , values , mask = mask )
3669+ array_slice = array [1 :]
3670+
3671+ # List offsets and sizes must not be slices if a validity map is specified
3672+ with pytest .raises (pa .lib .ArrowInvalid ):
3673+ list_array_type .from_arrays (
3674+ array_slice .offsets , array_slice .sizes ,
3675+ array_slice .values , mask = array_slice .is_null ())
3676+
3677+
3678+ @pytest .mark .parametrize (('list_array_type' , 'list_type_factory' , 'offset_type' ),
3679+ [(pa .ListViewArray , pa .list_view , pa .int32 ()),
3680+ (pa .LargeListViewArray , pa .large_list_view , pa .int64 ())])
3681+ def test_list_view_flatten (list_array_type , list_type_factory , offset_type ):
3682+ arr0 = pa .array ([
3683+ 1 , None , 2 ,
3684+ 3 , 4 ,
3685+ 5 , 6 ,
3686+ 7 , 8
3687+ ], type = pa .int64 ())
3688+
3689+ typ1 = list_type_factory (pa .int64 ())
3690+ arr1 = pa .array ([
3691+ [1 , None , 2 ],
3692+ None ,
3693+ [3 , 4 ],
3694+ [],
3695+ [5 , 6 ],
3696+ None ,
3697+ [7 , 8 ]
3698+ ], type = typ1 )
3699+ offsets1 = pa .array ([0 , 3 , 3 , 5 , 5 , 7 , 7 ], type = offset_type )
3700+ sizes1 = pa .array ([3 , 0 , 2 , 0 , 2 , 0 , 2 ], type = offset_type )
3701+
3702+ typ2 = list_type_factory (
3703+ list_type_factory (
3704+ pa .int64 ()
3705+ )
3706+ )
3707+ arr2 = pa .array ([
3708+ None ,
3709+ [
3710+ [1 , None , 2 ],
3711+ None ,
3712+ [3 , 4 ]
3713+ ],
3714+ [],
3715+ [
3716+ [],
3717+ [5 , 6 ],
3718+ None
3719+ ],
3720+ [
3721+ [7 , 8 ]
3722+ ]
3723+ ], type = typ2 )
3724+ offsets2 = pa .array ([0 , 0 , 3 , 3 , 6 ], type = offset_type )
3725+ sizes2 = pa .array ([0 , 3 , 0 , 3 , 1 ], type = offset_type )
3726+
3727+ assert arr1 .flatten ().equals (arr0 )
3728+ assert arr1 .offsets .equals (offsets1 )
3729+ assert arr1 .sizes .equals (sizes1 )
3730+ assert arr1 .values .equals (arr0 )
3731+ assert arr2 .flatten ().equals (arr1 )
3732+ assert arr2 .offsets .equals (offsets2 )
3733+ assert arr2 .sizes .equals (sizes2 )
3734+ assert arr2 .values .equals (arr1 )
3735+ assert arr2 .flatten ().flatten ().equals (arr0 )
3736+ assert arr2 .values .values .equals (arr0 )
3737+
3738+ # test out of order offsets
36413739 values = [1 , 2 , 3 , 4 ]
36423740 offsets = [3 , 2 , 1 , 0 ]
36433741 sizes = [1 , 1 , 1 , 1 ]
36443742 array = list_array_type .from_arrays (offsets , sizes , values )
36453743
36463744 assert array .flatten ().to_pylist () == [4 , 3 , 2 , 1 ]
3745+
3746+ # test null elements backed by non-empty sublists
3747+ mask = pa .array ([False , False , False , True ])
3748+ array = list_array_type .from_arrays (offsets , sizes , values , mask = mask )
3749+
3750+ assert array .flatten ().to_pylist () == [4 , 3 , 2 ]
3751+ assert array .values .to_pylist () == [1 , 2 , 3 , 4 ]
3752+
3753+
3754+ @pytest .mark .parametrize ('list_view_type' , [pa .ListViewArray , pa .LargeListViewArray ])
3755+ def test_list_view_slice (list_view_type ):
3756+ # sliced -> values keeps referring to full values buffer, but offsets is
3757+ # sliced as well so the offsets correctly point into the full values array
3758+ # sliced -> flatten() will return the sliced value array.
3759+
3760+ array = list_view_type .from_arrays (offsets = [0 , 3 , 4 ], sizes = [
3761+ 3 , 1 , 2 ], values = [1 , 2 , 3 , 4 , 5 , 6 ])
3762+ sliced_array = array [1 :]
3763+
3764+ assert sliced_array .values .to_pylist () == [1 , 2 , 3 , 4 , 5 , 6 ]
3765+ assert sliced_array .offsets .to_pylist () == [3 , 4 ]
3766+ assert sliced_array .flatten ().to_pylist () == [4 , 5 , 6 ]
3767+
3768+ i = sliced_array .offsets [0 ].as_py ()
3769+ j = sliced_array .offsets [1 ].as_py ()
3770+
3771+ assert sliced_array [0 ].as_py () == sliced_array .values [i :j ].to_pylist () == [4 ]
0 commit comments