Skip to content

Error writing virtual dataset to icechunk #421

@mpiannucci

Description

@mpiannucci

I was going through to update icechunk docs and hit this error:

from datetime import datetime, timezone
import fsspec
import icechunk
import xarray as xr
from virtualizarr import open_virtual_dataset


fs = fsspec.filesystem('s3')

oisst_files = fs.glob('s3://noaa-cdr-sea-surface-temp-optimum-interpolation-pds/data/v2.1/avhrr/202408/oisst-avhrr-v02r01.*.nc')
oisst_files = sorted(['s3://'+f for f in oisst_files])

virtual_datasets =[
    open_virtual_dataset(url, indexes={})
    for url in oisst_files
]
vds = xr.concat(
    virtual_datasets,
    dim='time',
    coords='minimal',
    compat='override',
    combine_attrs='override'
)

storage = icechunk.local_filesystem_storage("./oisst")
config = icechunk.RepositoryConfig.default()
config.set_virtual_chunk_container(icechunk.VirtualChunkContainer("s3", "s3://", icechunk.s3_store(region="us-east-1")))
credentials = icechunk.containers_credentials(s3=icechunk.s3_credentials(anonymous=True))
repo = icechunk.Repository.create(storage, config, credentials)

session = repo.writable_session("main")
vds.virtualize.to_icechunk(session.store, last_updated_at=datetime.now(timezone.utc))
session.commit("Add OISST")

Error:

File ~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:112, in dataset_to_icechunk(ds, store, group, append_dim, last_updated_at)
    [106](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:106)     group_object = Group.from_store(store=store_path, zarr_format=3)
    [108](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:108) group_object.update_attributes(
    [109](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:109)     {k: encode_zarr_attr_value(v) for k, v in ds.attrs.items()}
    [110](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:110) )
--> [112](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:112) return write_variables_to_icechunk_group(
    [113](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:113)     ds.variables,
    [114](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:114)     ds.attrs,
    [115](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:115)     store=store,
    [116](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:116)     group=group_object,
    [117](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:117)     append_dim=append_dim,
    [118](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:118)     last_updated_at=last_updated_at,
    [119](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:119) )

File ~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:156, in write_variables_to_icechunk_group(variables, attrs, store, group, append_dim, last_updated_at)
    [154](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:154) # Then finish by writing the virtual variables to the same group
    [155](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:155) for name, var in virtual_variables.items():
--> [156](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:156)     write_virtual_variable_to_icechunk(
    [157](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:157)         store=store,
    [158](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:158)         group=group,
    [159](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:159)         name=name,
    [160](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:160)         var=var,
    [161](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:161)         append_dim=append_dim,
    [162](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:162)         last_updated_at=last_updated_at,
    [163](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:163)     )

File ~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:246, in write_virtual_variable_to_icechunk(store, group, name, var, append_dim, last_updated_at)
    [244](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:244) append_axis = None
    [245](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:245) # create array if it doesn't already exist
--> [246](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:246) arr = group.require_array(
    [247](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:247)     name=name,
    [248](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:248)     shape=zarray.shape,
    [249](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:249)     chunks=zarray.chunks,
    [250](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:250)     dtype=encode_dtype(zarray.dtype),
    [251](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:251)     compressors=zarray._v3_codec_pipeline(),  # compressors,
    [252](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:252)     serializer=zarray.serializer(),
    [253](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:253)     dimension_names=var.dims,
    [254](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:254)     fill_value=zarray.fill_value,
    [255](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:255) )
    [257](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:257) arr.update_attributes(
    [258](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:258)     {k: encode_zarr_attr_value(v) for k, v in var.attrs.items()}
    [259](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:259) )
    [261](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/virtualizarr/writers/icechunk.py:261) _encoding_keys = {"_FillValue", "missing_value", "scale_factor", "add_offset"}

File ~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/group.py:2441, in Group.require_array(self, name, shape, **kwargs)
   [2425](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/group.py:2425) def require_array(self, name: str, *, shape: ShapeLike, **kwargs: Any) -> Array:
   [2426](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/group.py:2426)     """Obtain an array, creating if it doesn't exist.
   [2427](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/group.py:2427) 
   [2428](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/group.py:2428)     Other `kwargs` are as per :func:`zarr.Group.create_array`.
   (...)
   [2439](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/group.py:2439)     a : Array
   [2440](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/group.py:2440)     """
-> [2441](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/group.py:2441)     return Array(self._sync(self._async_group.require_array(name, shape=shape, **kwargs)))

File ~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:187, in SyncMixin._sync(self, coroutine)
    [184](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:184) def _sync(self, coroutine: Coroutine[Any, Any, T]) -> T:
    [185](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:185)     # TODO: refactor this to to take *args and **kwargs and pass those to the method
    [186](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:186)     # this should allow us to better type the sync wrapper
--> [187](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:187)     return sync(
    [188](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:188)         coroutine,
    [189](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:189)         timeout=config.get("async.timeout"),
    [190](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:190)     )

File ~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:142, in sync(coro, loop, timeout)
    [139](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:139) return_result = next(iter(finished)).result()
    [141](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:141) if isinstance(return_result, BaseException):
--> [142](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:142)     raise return_result
    [143](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:143) else:
    [144](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:144)     return return_result

File ~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:98, in _runner(coro)
     [93](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:93) """
     [94](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:94) Await a coroutine and return the result of running it. If awaiting the coroutine raises an
     [95](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:95) exception, the exception will be returned.
     [96](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:96) """
     [97](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:97) try:
---> [98](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:98)     return await coro
     [99](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:99) except Exception as ex:
    [100](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/sync.py:100)     return ex

File ~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/group.py:1259, in AsyncGroup.require_array(self, name, shape, dtype, exact, **kwargs)
   [1257](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/group.py:1257)             raise TypeError(f"Incompatible dtype ({ds.dtype} vs {dtype})")
   [1258](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/group.py:1258) except KeyError:
-> [1259](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/group.py:1259)     ds = await self.create_array(name, shape=shape, dtype=dtype, **kwargs)
   [1261](https://file+.vscode-resource.vscode-cdn.net/Users/matthew.earthmover/Developer/icechunk-xarray-demos/oisset-demo/~/Developer/icechunk-xarray-demos/.venv/lib/python3.11/site-packages/zarr/core/group.py:1261) return ds

TypeError: Expected a BytesBytesCodec. Got <class 'numcodecs.zarr3.FixedScaleOffset'> instead.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions