-
Notifications
You must be signed in to change notification settings - Fork 674
Description
System information
- Ubuntu 19.04
- pipenv install modin:
- Modin version: 0.5.0
- Python version: 3.7.3
- Exact command to reproduce: pd.read_parquet('parquet-out/foo/')
Describe the problem
It seems that read_parquet cannot read an entire directory of parquet files i.e. partitioned parquet directory.
Source code / logs
~/.local/share/virtualenvs/foo-cFmMv4Vf/lib/python3.7/site-packages/modin/pandas/io.py in read_parquet(path, engine, columns, **kwargs)
27 return DataFrame(
28 query_compiler=BaseFactory.read_parquet(
---> 29 path=path, columns=columns, engine=engine, **kwargs
30 )
31 )
~/.local/share/virtualenvs/foo-cFmMv4Vf/lib/python3.7/site-packages/modin/data_management/factories.py in read_parquet(cls, **kwargs)
49 @classmethod
50 def read_parquet(cls, **kwargs):
---> 51 return cls._determine_engine()._read_parquet(**kwargs)
52
53 @classmethod
~/.local/share/virtualenvs/foo-cFmMv4Vf/lib/python3.7/site-packages/modin/data_management/factories.py in _read_parquet(cls, **kwargs)
53 @classmethod
54 def _read_parquet(cls, **kwargs):
---> 55 return cls.io_cls.read_parquet(**kwargs)
56
57 @classmethod
~/.local/share/virtualenvs/foo-cFmMv4Vf/lib/python3.7/site-packages/modin/engines/ray/generic/io.py in read_parquet(cls, path, engine, columns, **kwargs)
149
150 if not columns:
--> 151 pf = ParquetFile(path, memory_map=False)
152 columns = [
153 name
~/.local/share/virtualenvs/foo-cFmMv4Vf/lib/python3.7/site-packages/ray/pyarrow_files/pyarrow/parquet.py in init(self, source, metadata, common_metadata, memory_map)
127 memory_map=True):
128 self.reader = ParquetReader()
--> 129 self.reader.open(source, use_memory_map=memory_map, metadata=metadata)
130 self.common_metadata = common_metadata
131 self._nested_paths_by_prefix = self._build_nested_paths()
~/.local/share/virtualenvs/foo-cFmMv4Vf/lib/python3.7/site-packages/ray/pyarrow_files/pyarrow/_parquet.pyx in pyarrow._parquet.ParquetReader.open()
~/.local/share/virtualenvs/foo-cFmMv4Vf/lib/python3.7/site-packages/ray/pyarrow_files/pyarrow/error.pxi in pyarrow.lib.check_status()
ArrowIOError: Arrow error: IOError: Error reading bytes from file: Is a directory