Skip to content

Error when using engine='scipy' reading CM2.6 ocean output #1704

@jbusecke

Description

@jbusecke

Code Sample, a copy-pastable example if possible

path = '/work/Julius.Busecke/CM2.6_staged/CM2.6_A_V03_1PctTo2X/annual_averages'
ds_ocean = xr.open_mfdataset(os.path.join(path,'ocean.*.ann.nc'), chunks={'time':1}, 
                             decode_times=False, engine='scipy')
ds_ocean

gives

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-7-278556ff881c> in <module>()
      1 path = '/work/Julius.Busecke/CM2.6_staged/CM2.6_A_V03_1PctTo2X/annual_averages'
----> 2 ds_ocean = xr.open_mfdataset(os.path.join(path,'ocean.*.ann.nc'), chunks={'time':1}, decode_times=False, engine='scipy')
      3 ds_ocean

~/code/miniconda/envs/standard/lib/python3.6/site-packages/xarray/backends/api.py in open_mfdataset(paths, chunks, concat_dim, compat, preprocess, engine, lock, **kwargs)
    503         lock = _default_lock(paths[0], engine)
    504     datasets = [open_dataset(p, engine=engine, chunks=chunks or {}, lock=lock,
--> 505                              **kwargs) for p in paths]
    506     file_objs = [ds._file_obj for ds in datasets]
    507 

~/code/miniconda/envs/standard/lib/python3.6/site-packages/xarray/backends/api.py in <listcomp>(.0)
    503         lock = _default_lock(paths[0], engine)
    504     datasets = [open_dataset(p, engine=engine, chunks=chunks or {}, lock=lock,
--> 505                              **kwargs) for p in paths]
    506     file_objs = [ds._file_obj for ds in datasets]
    507 

~/code/miniconda/envs/standard/lib/python3.6/site-packages/xarray/backends/api.py in open_dataset(filename_or_obj, group, decode_cf, mask_and_scale, decode_times, autoclose, concat_characters, decode_coords, engine, chunks, lock, cache, drop_variables)
    283         elif engine == 'scipy':
    284             store = backends.ScipyDataStore(filename_or_obj,
--> 285                                             autoclose=autoclose)
    286         elif engine == 'pydap':
    287             store = backends.PydapDataStore(filename_or_obj)

~/code/miniconda/envs/standard/lib/python3.6/site-packages/xarray/backends/scipy_.py in __init__(self, filename_or_obj, mode, format, group, writer, mmap, autoclose)
    133                                    filename=filename_or_obj,
    134                                    mode=mode, mmap=mmap, version=version)
--> 135         self.ds = opener()
    136         self._autoclose = autoclose
    137         self._isopen = True

~/code/miniconda/envs/standard/lib/python3.6/site-packages/xarray/backends/scipy_.py in _open_scipy_netcdf(filename, mode, mmap, version)
     81     try:
     82         return scipy.io.netcdf_file(filename, mode=mode, mmap=mmap,
---> 83                                     version=version)
     84     except TypeError as e:  # netcdf3 message is obscure in this case
     85         errmsg = e.args[0]

~/code/miniconda/envs/standard/lib/python3.6/site-packages/scipy/io/netcdf.py in __init__(self, filename, mode, mmap, version, maskandscale)
    264 
    265         if mode in 'ra':
--> 266             self._read()
    267 
    268     def __setattr__(self, attr, value):

~/code/miniconda/envs/standard/lib/python3.6/site-packages/scipy/io/netcdf.py in _read(self)
    591         self._read_dim_array()
    592         self._read_gatt_array()
--> 593         self._read_var_array()
    594 
    595     def _read_numrecs(self):

~/code/miniconda/envs/standard/lib/python3.6/site-packages/scipy/io/netcdf.py in _read_var_array(self)
    696             # Build rec array.
    697             if self.use_mmap:
--> 698                 rec_array = self._mm_buf[begin:begin+self._recs*self._recsize].view(dtype=dtypes)
    699                 rec_array.shape = (self._recs,)
    700             else:

ValueError: new type not compatible with array.

xarray version: '0.9.6'

Problem description

I am trying to lazily read in a large number of high resolution ocean model output files. If I omit the engine='scipy' it works but takes forever.
Is there a known reason why this would fail with the 'scipy' option?

I found #1313, and checked my conda environment:

$ conda list hdf
# packages in environment at /home/Julius.Busecke/code/miniconda/envs/standard:
#
hdf4                      4.2.12                        0    conda-forge
hdf5                      1.8.18                        1    conda-forge
$ conda list netcdf
# packages in environment at /home/Julius.Busecke/code/miniconda/envs/standard:
#
h5netcdf                  0.4.2                      py_0    conda-forge
libnetcdf                 4.4.1.1                       6    conda-forge
netcdf4                   1.3.0                    py36_0    conda-forge

I can also import netCDF4 and also load a single file using netCDF, so I am unsure if this is the same error as in #1313

I keep getting this error with some of the files for this particular model but not with others.

Any help would be greatly appreciated.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions