Skip to content

Did you mean to make pyarrow a hard dependency of dask-array #12072

@hmaarrfk

Description

@hmaarrfk

I found this because I depend on dask + xarray and only use a very restricted subset of dask.

mamba create --name d "dask-core>=2025.9.0" python=3.12 xarray --channel conda-forge --override-channels

with the script

import numpy as np
import xarray as xr
from dask import array as da
import dask
x = xr.DataArray(
    data=da.zeros((1)),
    dims=('x',),
    name='foo'
).to_dataset()
x['foo'] = x.foo.dims, x.foo.data + 1
dask.optimize(x)

y = xr.DataArray(
    data=da.zeros((1)),
    dims=('y',),
    coords={'y': np.arange(1)},
    name='foo'
).to_dataset()
y['foo'] = y.foo.dims, y.foo.data + 1
dask.optimize(y)
Details
>>> >>> >>> >>> ... ... ... ... >>> >>> (<xarray.Dataset> Size: 8B
Dimensions:  (x: 1)
Dimensions without coordinates: x
Data variables:
    foo      (x) float64 8B dask.array<chunksize=(1,), meta=np.ndarray>,)
>>> >>> ... ... ... ... ... >>> >>> Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/base.py", line 583, in optimize
    collections, repack = unpack_collections(*args, traverse=traverse)
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/base.py", line 521, in unpack_collections
    repack_dsk[out] = Task(out, tuple, List(*[_unpack(i) for i in args]))
                                              ^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/base.py", line 485, in _unpack
    tok = tokenize(expr)
          ^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 76, in tokenize
    return _tokenize(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 34, in _tokenize
    token: object = _normalize_seq_func(args)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 154, in _normalize_seq_func
    return tuple(map(_inner_normalize_token, seq))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 147, in _inner_normalize_token
    return normalize_token(item)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/utils.py", line 781, in __call__
    return meth(arg, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 198, in normalize_object
    return method()
           ^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/xarray/core/dataset.py", line 628, in __dask_tokenize__
    return normalize_token(
           ^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/utils.py", line 781, in __call__
    return meth(arg, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 161, in normalize_seq
    return type(seq).__name__, _normalize_seq_func(seq)
                               ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 154, in _normalize_seq_func
    return tuple(map(_inner_normalize_token, seq))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 147, in _inner_normalize_token
    return normalize_token(item)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/utils.py", line 781, in __call__
    return meth(arg, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 122, in normalize_dict
    return "dict", _normalize_seq_func(
                   ^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 154, in _normalize_seq_func
    return tuple(map(_inner_normalize_token, seq))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 147, in _inner_normalize_token
    return normalize_token(item)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/utils.py", line 781, in __call__
    return meth(arg, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 161, in normalize_seq
    return type(seq).__name__, _normalize_seq_func(seq)
                               ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 154, in _normalize_seq_func
    return tuple(map(_inner_normalize_token, seq))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 147, in _inner_normalize_token
    return normalize_token(item)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/utils.py", line 781, in __call__
    return meth(arg, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 198, in normalize_object
    return method()
           ^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/xarray/core/variable.py", line 2755, in __dask_tokenize__
    return normalize_token(
           ^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/utils.py", line 781, in __call__
    return meth(arg, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 161, in normalize_seq
    return type(seq).__name__, _normalize_seq_func(seq)
                               ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 154, in _normalize_seq_func
    return tuple(map(_inner_normalize_token, seq))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 147, in _inner_normalize_token
    return normalize_token(item)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/utils.py", line 780, in __call__
    meth = self.dispatch(type(arg))
           ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/utils.py", line 759, in dispatch
    register()
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/tokenize.py", line 281, in register_pandas
    from dask.dataframe._compat import PANDAS_GE_210
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/dataframe/__init__.py", line 24, in <module>
    from dask.dataframe import backends, dispatch
  File "/home/mark/miniforge3/envs/d/lib/python3.12/site-packages/dask/dataframe/backends.py", line 8, in <module>
    import pyarrow as pa
ModuleNotFoundError: No module named 'pyarrow'

Anything else we need to know?: I decided to work around this by removing dask optimize from our stack. But it a little annoying .

I mostly think that pyarrow is a heavy dependency to have a hard dependency on for a "backend".

Environment:

  • Dask version: 2025.9.0
  • Python version: 3.12
  • Operating System: Linux
  • Install method (conda, pip, source): conda + conda-forge

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething is broken

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions