-
-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Closed
Description
dask version 0.9.0, pandas 0.18.1 (most recent from conda as of posting)
Grab a 1 MB fake TAQ file from here. (aside: same data is ~1/4 MB in zipped fixed width - chunk sizes are probably dumb for this data.)
pd.read_hdf('small_test_data_public.h5', '/IXQAJE/no_suffix') works, dask.dataframe.read_hdf('small_test_data_public.h5', '/IXQAJE/no_suffix') fails with the following stack trace. I think it may be due to the attempt to read an empty dataframe of 0-length. If I read the intent correctly, it would probably make more sense to retrieve a pytables or h5py object which would provide the desired metadata without the weirdness around a 0-length read.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-110-8ad7ff0d4733> in <module>()
----> 1 spy_dd = dd.read_hdf(fname, max_sym)
/home/dav/miniconda3/envs/TAQ/lib/python3.5/site-packages/dask/dataframe/io.py in read_hdf(pattern, key, start, stop, columns, chunksize, lock)
559 columns=columns, chunksize=chunksize,
560 lock=lock)
--> 561 for path in paths])
562
563
/home/dav/miniconda3/envs/TAQ/lib/python3.5/site-packages/dask/dataframe/io.py in <listcomp>(.0)
559 columns=columns, chunksize=chunksize,
560 lock=lock)
--> 561 for path in paths])
562
563
/home/dav/miniconda3/envs/TAQ/lib/python3.5/site-packages/dask/dataframe/io.py in _read_single_hdf(path, key, start, stop, columns, chunksize, lock)
499 from .multi import concat
500 return concat([one_path_one_key(path, k, start, s, columns, chunksize, lock)
--> 501 for k, s in zip(keys, stops)])
502
503
/home/dav/miniconda3/envs/TAQ/lib/python3.5/site-packages/dask/dataframe/io.py in <listcomp>(.0)
499 from .multi import concat
500 return concat([one_path_one_key(path, k, start, s, columns, chunksize, lock)
--> 501 for k, s in zip(keys, stops)])
502
503
/home/dav/miniconda3/envs/TAQ/lib/python3.5/site-packages/dask/dataframe/io.py in one_path_one_key(path, key, start, stop, columns, chunksize, lock)
474 not contain any wildcards).
475 """
--> 476 empty = pd.read_hdf(path, key, stop=0)
477 if columns is not None:
478 empty = empty[columns]
/home/dav/miniconda3/envs/TAQ/lib/python3.5/site-packages/pandas/io/pytables.py in read_hdf(path_or_buf, key, **kwargs)
328 'multiple datasets.')
329 key = keys[0]
--> 330 return store.select(key, auto_close=auto_close, **kwargs)
331 except:
332 # if there is an error, close the store
/home/dav/miniconda3/envs/TAQ/lib/python3.5/site-packages/pandas/io/pytables.py in select(self, key, where, start, stop, columns, iterator, chunksize, auto_close, **kwargs)
678 chunksize=chunksize, auto_close=auto_close)
679
--> 680 return it.get_result()
681
682 def select_as_coordinates(
/home/dav/miniconda3/envs/TAQ/lib/python3.5/site-packages/pandas/io/pytables.py in get_result(self, coordinates)
1362
1363 # directly return the result
-> 1364 results = self.func(self.start, self.stop, where)
1365 self.close()
1366 return results
/home/dav/miniconda3/envs/TAQ/lib/python3.5/site-packages/pandas/io/pytables.py in func(_start, _stop, _where)
671 return s.read(start=_start, stop=_stop,
672 where=_where,
--> 673 columns=columns, **kwargs)
674
675 # create the iterator
/home/dav/miniconda3/envs/TAQ/lib/python3.5/site-packages/pandas/io/pytables.py in read(self, where, columns, **kwargs)
4052
4053 block = make_block(values, placement=np.arange(len(cols_)))
-> 4054 mgr = BlockManager([block], [cols_, index_])
4055 frames.append(DataFrame(mgr))
4056
/home/dav/miniconda3/envs/TAQ/lib/python3.5/site-packages/pandas/core/internals.py in __init__(self, blocks, axes, do_integrity_check, fastpath)
2592
2593 if do_integrity_check:
-> 2594 self._verify_integrity()
2595
2596 self._consolidate_check()
/home/dav/miniconda3/envs/TAQ/lib/python3.5/site-packages/pandas/core/internals.py in _verify_integrity(self)
2802 for block in self.blocks:
2803 if block._verify_integrity and block.shape[1:] != mgr_shape[1:]:
-> 2804 construction_error(tot_items, block.shape[1:], self.axes)
2805 if len(self.items) != tot_items:
2806 raise AssertionError('Number of manager items must equal union of '
/home/dav/miniconda3/envs/TAQ/lib/python3.5/site-packages/pandas/core/internals.py in construction_error(tot_items, block_shape, axes, e)
3966 raise e
3967 if block_shape[0] == 0:
-> 3968 raise ValueError("Empty data passed with indices specified.")
3969 raise ValueError("Shape of passed values is {0}, indices imply {1}".format(
3970 passed, implied))
ValueError: Empty data passed with indices specified.
Reactions are currently unavailable