Skip to content

Commit 84e5430

Browse files
GH-34686: [Python] Add RunEndEncodedScalar class (#34924)
### Rationale for this change Follow-up on #34570 (exposing the new RunEndEncoded array and type in pyarrow) to also add the scalar class. ### Are there any user-facing changes? The example from the issue now works: ``` In [15]: run_ends = [3, 5, 10, 19] ...: values = [1, 2, 1, 3] ...: ree_array = pa.RunEndEncodedArray.from_arrays(run_ends, values) In [16]: ree_array[0] Out[16]: <pyarrow.RunEndEncodedScalar: 1> ``` * Closes: #34686 Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com> Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
1 parent e2afb8c commit 84e5430

6 files changed

Lines changed: 47 additions & 1 deletion

File tree

docs/source/python/api/arrays.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ classes may expose data type-specific methods or properties.
123123
MonthDayNanoIntervalScalar
124124
Decimal128Scalar
125125
DictionaryScalar
126+
RunEndEncodedScalar
126127
ListScalar
127128
LargeListScalar
128129
MapScalar

python/pyarrow/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ def print_entry(label, value):
225225
StringScalar, LargeStringScalar,
226226
FixedSizeBinaryScalar, DictionaryScalar,
227227
MapScalar, StructScalar, UnionScalar,
228-
ExtensionScalar)
228+
RunEndEncodedScalar, ExtensionScalar)
229229

230230
# Buffers, allocation
231231
from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,

python/pyarrow/includes/libarrow.pxd

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1155,6 +1155,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
11551155
vector[shared_ptr[CScalar]] value
11561156
int child_id
11571157

1158+
cdef cppclass CRunEndEncodedScalar" arrow::RunEndEncodedScalar"(CScalar):
1159+
shared_ptr[CScalar] value
1160+
11581161
cdef cppclass CExtensionScalar" arrow::ExtensionScalar"(CScalar):
11591162
CExtensionScalar(shared_ptr[CScalar] storage,
11601163
shared_ptr[CDataType], c_bool is_valid)

python/pyarrow/scalar.pxi

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -880,6 +880,25 @@ cdef class DictionaryScalar(Scalar):
880880
return self.value.as_py() if self.is_valid else None
881881

882882

883+
cdef class RunEndEncodedScalar(Scalar):
884+
"""
885+
Concrete class for RunEndEncoded scalars.
886+
"""
887+
@property
888+
def value(self):
889+
"""
890+
Return underlying value as a scalar.
891+
"""
892+
cdef CRunEndEncodedScalar* sp = <CRunEndEncodedScalar*> self.wrapped.get()
893+
return Scalar.wrap(sp.value)
894+
895+
def as_py(self):
896+
"""
897+
Return underlying value as a Python object.
898+
"""
899+
return self.value.as_py()
900+
901+
883902
cdef class UnionScalar(Scalar):
884903
"""
885904
Concrete class for Union scalars.
@@ -1010,6 +1029,7 @@ cdef dict _scalar_classes = {
10101029
_Type_STRUCT: StructScalar,
10111030
_Type_MAP: MapScalar,
10121031
_Type_DICTIONARY: DictionaryScalar,
1032+
_Type_RUN_END_ENCODED: RunEndEncodedScalar,
10131033
_Type_SPARSE_UNION: UnionScalar,
10141034
_Type_DENSE_UNION: UnionScalar,
10151035
_Type_INTERVAL_MONTH_DAY_NANO: MonthDayNanoIntervalScalar,

python/pyarrow/tests/test_misc.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ def test_runtime_info():
203203
pa.UnionScalar,
204204
pa.StructScalar,
205205
pa.DictionaryScalar,
206+
pa.RunEndEncodedScalar,
206207
pa.ipc.Message,
207208
pa.ipc.MessageReader,
208209
pa.MemoryPool,

python/pyarrow/tests/test_scalars.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -681,6 +681,27 @@ def test_dictionary():
681681
assert restored.equals(s)
682682

683683

684+
def test_run_end_encoded():
685+
run_ends = [3, 5, 10, 12, 19]
686+
values = [1, 2, 1, None, 3]
687+
arr = pa.RunEndEncodedArray.from_arrays(run_ends, values)
688+
689+
scalar = arr[0]
690+
assert isinstance(scalar, pa.RunEndEncodedScalar)
691+
assert isinstance(scalar.value, pa.Int64Scalar)
692+
assert scalar.value == pa.array(values)[0]
693+
assert scalar.as_py() == 1
694+
695+
# null -> .value is still a scalar, as_py returns None
696+
scalar = arr[10]
697+
assert isinstance(scalar.value, pa.Int64Scalar)
698+
assert scalar.as_py() is None
699+
700+
# constructing a scalar directly doesn't work yet
701+
with pytest.raises(NotImplementedError):
702+
pa.scalar(1, pa.run_end_encoded(pa.int64(), pa.int64()))
703+
704+
684705
def test_union():
685706
# sparse
686707
arr = pa.UnionArray.from_sparse(

0 commit comments

Comments
 (0)