@@ -29,6 +29,7 @@ import pyarrow as pa
2929from pyarrow.lib cimport *
3030from pyarrow.lib import ArrowTypeError, frombytes, tobytes, _pc
3131from pyarrow.includes.libarrow_dataset cimport *
32+ from pyarrow._acero cimport ExecNodeOptions
3233from pyarrow._compute cimport Expression, _bind
3334from pyarrow._compute import _forbid_instantiation
3435from pyarrow._fs cimport FileSystem, FileSelector
@@ -37,14 +38,19 @@ from pyarrow._csv cimport (
3738from pyarrow.util import _is_iterable, _is_path_like, _stringify_path
3839
3940
40- _orc_fileformat = None
41- _orc_imported = False
42-
4341_DEFAULT_BATCH_SIZE = 2 ** 17
4442_DEFAULT_BATCH_READAHEAD = 16
4543_DEFAULT_FRAGMENT_READAHEAD = 4
4644
4745
46+ # Initialise support for Datasets in ExecPlan
47+ Initialize()
48+
49+
50+ _orc_fileformat = None
51+ _orc_imported = False
52+
53+
4854def _get_orc_fileformat ():
4955 """
5056 Import OrcFileFormat on first usage (to avoid circular import issue
@@ -3634,3 +3640,45 @@ def _filesystemdataset_write(
36343640 c_scanner = data.unwrap()
36353641 with nogil:
36363642 check_status(CFileSystemDataset.Write(c_options, c_scanner))
3643+
3644+
3645+ cdef class _ScanNodeOptions(ExecNodeOptions):
3646+
3647+ def _set_options (self , Dataset dataset , dict scan_options ):
3648+ cdef:
3649+ shared_ptr[CScanOptions] c_scan_options
3650+
3651+ c_scan_options = Scanner._make_scan_options(dataset, scan_options)
3652+
3653+ self .wrapped.reset(
3654+ new CScanNodeOptions(dataset.unwrap(), c_scan_options)
3655+ )
3656+
3657+
3658+ class ScanNodeOptions (_ScanNodeOptions ):
3659+ """
3660+ A Source node which yields batches from a Dataset scan.
3661+
3662+ This is the option class for the "scan" node factory.
3663+
3664+ This node is capable of applying pushdown projections or filters
3665+ to the file readers which reduce the amount of data that needs to
3666+ be read (if supported by the file format). But note that this does not
3667+ construct associated filter or project nodes to perform the final
3668+ filtering or projection. Rather, you may supply the same filter
3669+ expression or projection to the scan node that you also supply
3670+ to the filter or project node.
3671+
3672+ Yielded batches will be augmented with fragment/batch indices to
3673+ enable stable ordering for simple ExecPlans.
3674+
3675+ Parameters
3676+ ----------
3677+ dataset : pyarrow.dataset.Dataset
3678+ The table which acts as the data source.
3679+ **kwargs : dict, optional
3680+ Scan options. See `Scanner.from_dataset` for possible arguments.
3681+ """
3682+
3683+ def __init__ (self , Dataset dataset , **kwargs ):
3684+ self ._set_options(dataset, kwargs)
0 commit comments