-
-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Did you mean to make pyarrow a hard dependency of dask-array #12072
Copy link
Copy link
Closed
Labels
bugSomething is brokenSomething is broken
Description
I found this because I depend on dask + xarray and only use a very restricted subset of dask.
mamba create --name d "dask-core>=2025.9.0" python=3.12 xarray --channel conda-forge --override-channelswith the script
import numpy as np
import xarray as xr
from dask import array as da
import dask
x = xr.DataArray(
data=da.zeros((1)),
dims=('x',),
name='foo'
).to_dataset()
x['foo'] = x.foo.dims, x.foo.data + 1
dask.optimize(x)
y = xr.DataArray(
data=da.zeros((1)),
dims=('y',),
coords={'y': np.arange(1)},
name='foo'
).to_dataset()
y['foo'] = y.foo.dims, y.foo.data + 1
dask.optimize(y)Details
>>> >>> >>> >>> ... ... ... ... >>> >>> (<xarray.Dataset> Size: 8B
Dimensions: (x: 1)
Dimensions without coordinates: x
Data variables:
foo (x) float64 8B dask.array<chunksize=(1,), meta=np.ndarray>,)
>>> >>> ... ... ... ... ... >>> >>> Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/base.py", line 583, in optimize
collections, repack = unpack_collections(*args, traverse=traverse)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/base.py", line 521, in unpack_collections
repack_dsk[out] = Task(out, tuple, List(*[_unpack(i) for i in args]))
^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/base.py", line 485, in _unpack
tok = tokenize(expr)
^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 76, in tokenize
return _tokenize(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 34, in _tokenize
token: object = _normalize_seq_func(args)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 154, in _normalize_seq_func
return tuple(map(_inner_normalize_token, seq))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 147, in _inner_normalize_token
return normalize_token(item)
^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/utils.py", line 781, in __call__
return meth(arg, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 198, in normalize_object
return method()
^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/xarray/core/dataset.py", line 628, in __dask_tokenize__
return normalize_token(
^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/utils.py", line 781, in __call__
return meth(arg, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 161, in normalize_seq
return type(seq).__name__, _normalize_seq_func(seq)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 154, in _normalize_seq_func
return tuple(map(_inner_normalize_token, seq))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 147, in _inner_normalize_token
return normalize_token(item)
^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/utils.py", line 781, in __call__
return meth(arg, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 122, in normalize_dict
return "dict", _normalize_seq_func(
^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 154, in _normalize_seq_func
return tuple(map(_inner_normalize_token, seq))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 147, in _inner_normalize_token
return normalize_token(item)
^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/utils.py", line 781, in __call__
return meth(arg, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 161, in normalize_seq
return type(seq).__name__, _normalize_seq_func(seq)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 154, in _normalize_seq_func
return tuple(map(_inner_normalize_token, seq))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 147, in _inner_normalize_token
return normalize_token(item)
^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/utils.py", line 781, in __call__
return meth(arg, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 198, in normalize_object
return method()
^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/xarray/core/variable.py", line 2755, in __dask_tokenize__
return normalize_token(
^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/utils.py", line 781, in __call__
return meth(arg, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 161, in normalize_seq
return type(seq).__name__, _normalize_seq_func(seq)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 154, in _normalize_seq_func
return tuple(map(_inner_normalize_token, seq))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 147, in _inner_normalize_token
return normalize_token(item)
^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/utils.py", line 780, in __call__
meth = self.dispatch(type(arg))
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/utils.py", line 759, in dispatch
register()
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 281, in register_pandas
from dask.dataframe._compat import PANDAS_GE_210
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/dataframe/__init__.py", line 24, in <module>
from dask.dataframe import backends, dispatch
File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/dataframe/backends.py", line 8, in <module>
import pyarrow as pa
ModuleNotFoundError: No module named 'pyarrow'
Anything else we need to know?: I decided to work around this by removing dask optimize from our stack. But it a little annoying .
I mostly think that pyarrow is a heavy dependency to have a hard dependency on for a "backend".
Environment:
- Dask version: 2025.9.0
- Python version: 3.12
- Operating System: Linux
- Install method (conda, pip, source): conda + conda-forge
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
bugSomething is brokenSomething is broken