Skip to content

Commit 60b89ff

Browse files
GH-33500: [Python] add Table.to/from_struct_array (#38520)
### Rationale for this change ### What changes are included in this PR? ### Are these changes tested? ### Are there any user-facing changes? * Closes: #33500 Lead-authored-by: Judah Rand <17158624+judahrand@users.noreply.github.com> Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com> Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
1 parent dc40e5f commit 60b89ff

2 files changed

Lines changed: 129 additions & 0 deletions

File tree

python/pyarrow/table.pxi

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3991,6 +3991,60 @@ cdef class Table(_Tabular):
39913991
result.validate()
39923992
return result
39933993

3994+
@staticmethod
3995+
def from_struct_array(struct_array):
3996+
"""
3997+
Construct a Table from a StructArray.
3998+
3999+
Each field in the StructArray will become a column in the resulting
4000+
``Table``.
4001+
4002+
Parameters
4003+
----------
4004+
struct_array : StructArray or ChunkedArray
4005+
Array to construct the table from.
4006+
4007+
Returns
4008+
-------
4009+
pyarrow.Table
4010+
4011+
Examples
4012+
--------
4013+
>>> import pyarrow as pa
4014+
>>> struct = pa.array([{'n_legs': 2, 'animals': 'Parrot'},
4015+
... {'year': 2022, 'n_legs': 4}])
4016+
>>> pa.Table.from_struct_array(struct).to_pandas()
4017+
animals n_legs year
4018+
0 Parrot 2 NaN
4019+
1 None 4 2022.0
4020+
"""
4021+
if isinstance(struct_array, Array):
4022+
return Table.from_batches([RecordBatch.from_struct_array(struct_array)])
4023+
else:
4024+
return Table.from_batches([
4025+
RecordBatch.from_struct_array(chunk)
4026+
for chunk in struct_array.chunks
4027+
])
4028+
4029+
def to_struct_array(self, max_chunksize=None):
4030+
"""
4031+
Convert to a chunked array of struct type.
4032+
4033+
Parameters
4034+
----------
4035+
max_chunksize : int, default None
4036+
Maximum size for ChunkedArray chunks. Individual chunks may be
4037+
smaller depending on the chunk layout of individual columns.
4038+
4039+
Returns
4040+
-------
4041+
ChunkedArray
4042+
"""
4043+
return chunked_array([
4044+
batch.to_struct_array()
4045+
for batch in self.to_batches(max_chunksize=max_chunksize)
4046+
])
4047+
39944048
@staticmethod
39954049
def from_batches(batches, Schema schema=None):
39964050
"""

python/pyarrow/tests/test_table.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -878,6 +878,81 @@ def test_recordbatch_from_struct_array():
878878
))
879879

880880

881+
def test_recordbatch_to_struct_array():
882+
batch = pa.RecordBatch.from_arrays(
883+
[
884+
pa.array([1, None], type=pa.int32()),
885+
pa.array([None, 1.0], type=pa.float32()),
886+
], ["ints", "floats"]
887+
)
888+
result = batch.to_struct_array()
889+
assert result.equals(pa.array(
890+
[{"ints": 1}, {"floats": 1.0}],
891+
type=pa.struct([("ints", pa.int32()), ("floats", pa.float32())]),
892+
))
893+
894+
895+
def test_table_from_struct_array_invalid():
896+
with pytest.raises(TypeError, match="Argument 'struct_array' has incorrect type"):
897+
pa.Table.from_struct_array(pa.array(range(5)))
898+
899+
900+
def test_table_from_struct_array():
901+
struct_array = pa.array(
902+
[{"ints": 1}, {"floats": 1.0}],
903+
type=pa.struct([("ints", pa.int32()), ("floats", pa.float32())]),
904+
)
905+
result = pa.Table.from_struct_array(struct_array)
906+
assert result.equals(pa.Table.from_arrays(
907+
[
908+
pa.array([1, None], type=pa.int32()),
909+
pa.array([None, 1.0], type=pa.float32()),
910+
], ["ints", "floats"]
911+
))
912+
913+
914+
def test_table_from_struct_array_chunked_array():
915+
chunked_struct_array = pa.chunked_array(
916+
[[{"ints": 1}, {"floats": 1.0}]],
917+
type=pa.struct([("ints", pa.int32()), ("floats", pa.float32())]),
918+
)
919+
result = pa.Table.from_struct_array(chunked_struct_array)
920+
assert result.equals(pa.Table.from_arrays(
921+
[
922+
pa.array([1, None], type=pa.int32()),
923+
pa.array([None, 1.0], type=pa.float32()),
924+
], ["ints", "floats"]
925+
))
926+
927+
928+
def test_table_to_struct_array():
929+
table = pa.Table.from_arrays(
930+
[
931+
pa.array([1, None], type=pa.int32()),
932+
pa.array([None, 1.0], type=pa.float32()),
933+
], ["ints", "floats"]
934+
)
935+
result = table.to_struct_array()
936+
assert result.equals(pa.chunked_array(
937+
[[{"ints": 1}, {"floats": 1.0}]],
938+
type=pa.struct([("ints", pa.int32()), ("floats", pa.float32())]),
939+
))
940+
941+
942+
def test_table_to_struct_array_with_max_chunksize():
943+
table = pa.Table.from_arrays(
944+
[
945+
pa.array([1, None], type=pa.int32()),
946+
pa.array([None, 1.0], type=pa.float32()),
947+
], ["ints", "floats"]
948+
)
949+
result = table.to_struct_array(max_chunksize=1)
950+
assert result.equals(pa.chunked_array(
951+
[[{"ints": 1}], [{"floats": 1.0}]],
952+
type=pa.struct([("ints", pa.int32()), ("floats", pa.float32())]),
953+
))
954+
955+
881956
def _table_like_slice_tests(factory):
882957
data = [
883958
pa.array(range(5)),

0 commit comments

Comments
 (0)