Skip to content

Trouble applying argmin when using xr.open_mfdataset() #759

@spencerkclark

Description

@spencerkclark

I recently tried to apply the argmin function on a dataset that I opened using xr.open_mfdataset and encountered an unexpected error. Applying argmin on the same dataset opened using xr.open_dataset works fine. Below is an example with some toy data. Could this be a bug, or is there something I'm doing wrong? I appreciate your help.

In [1]: import xarray as xr
In [2]: import numpy as np
In [3]: xr.DataArray(np.random.rand(2, 3, 4), coords=[np.arange(2), np.arange(3), np.arange(4)], dims=['x', 'y', 'z']).to_dataset(name='test').to_netcdf('test_mfdataset.nc')
In [4]: xr.open_dataset('test_mfdataset.nc').test.argmin('x').values
Out[4]:
array([[1, 1, 1, 1],
       [1, 0, 1, 0],
       [1, 1, 0, 1]])

In [5]: xr.open_mfdataset('test_mfdataset.nc').test.argmin('x').values
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-8-ccac9ca40874> in <module>()
----> 1 xr.open_mfdataset('test_mfdataset.nc').test.argmin('x').values

//anaconda/lib/python2.7/site-packages/xarray/core/dataarray.py in values(self)
    353     def values(self):
    354         """The array's data as a numpy.ndarray"""
--> 355         return self.variable.values
    356
    357     @values.setter

//anaconda/lib/python2.7/site-packages/xarray/core/variable.py in values(self)
    286     def values(self):
    287         """The variable's data as a numpy.ndarray"""
--> 288         return _as_array_or_item(self._data_cached())
    289
    290     @values.setter

//anaconda/lib/python2.7/site-packages/xarray/core/variable.py in _data_cached(self)
    252     def _data_cached(self):
    253         if not isinstance(self._data, (np.ndarray, PandasIndexAdapter)):
--> 254             self._data = np.asarray(self._data)
    255         return self._data
    256

//anaconda/lib/python2.7/site-packages/numpy/core/numeric.pyc in asarray(a, dtype, order)
    472
    473     """
--> 474     return array(a, dtype, copy=False, order=order)
    475
    476 def asanyarray(a, dtype=None, order=None):

//anaconda/lib/python2.7/site-packages/dask/array/core.py in __array__(self, dtype, **kwargs)
    852
    853     def __array__(self, dtype=None, **kwargs):
--> 854         x = self.compute()
    855         if dtype and x.dtype != dtype:
    856             x = x.astype(dtype)

//anaconda/lib/python2.7/site-packages/dask/base.py in compute(self, **kwargs)
     35
     36     def compute(self, **kwargs):
---> 37         return compute(self, **kwargs)[0]
     38
     39     @classmethod

//anaconda/lib/python2.7/site-packages/dask/base.py in compute(*args, **kwargs)
    108                 for opt, val in groups.items()])
    109     keys = [var._keys() for var in variables]
--> 110     results = get(dsk, keys, **kwargs)
    111
    112     results_iter = iter(results)

//anaconda/lib/python2.7/site-packages/dask/threaded.py in get(dsk, result, cache, num_workers, **kwargs)
     55     results = get_async(pool.apply_async, len(pool._pool), dsk, result,
     56                         cache=cache, queue=queue, get_id=_thread_get_id,
---> 57                         **kwargs)
     58
     59     return results

//anaconda/lib/python2.7/site-packages/dask/async.py in get_async(apply_async, num_workers, dsk, result, cache, queue, get_id, raise_on_exception, rerun_exceptions_locally, callbacks, **kwargs)
    479                 _execute_task(task, data)  # Re-execute locally
    480             else:
--> 481                 raise(remote_exception(res, tb))
    482         state['cache'][key] = res
    483         finish_task(dsk, key, state, results, keyorder.get)

IndexError: shape mismatch: indexing arrays could not be broadcast together with shapes (3,4) (4,1) (1,3)

Traceback
---------
  File "//anaconda/lib/python2.7/site-packages/dask/async.py", line 264, in execute_task
    result = _execute_task(task, data)
  File "//anaconda/lib/python2.7/site-packages/dask/async.py", line 246, in _execute_task
    return func(*args2)
  File "//anaconda/lib/python2.7/site-packages/toolz/functoolz.py", line 381, in __call__
    ret = f(ret)
  File "//anaconda/lib/python2.7/site-packages/dask/array/reductions.py", line 450, in arg_agg
    return _arg_combine(data, axis, argfunc)[0]
  File "//anaconda/lib/python2.7/site-packages/dask/array/reductions.py", line 416, in _arg_combine
    arg = (arg + offsets)[tuple(inds)]

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions