-
Notifications
You must be signed in to change notification settings - Fork 434
SimpleCacheFileSystem does not handle "a" or "r+" properly #1977
Copy link
Copy link
Closed
Description
Currently, SimpleCacheFileSystem only get the file from underlying file system when the mode is reading ("r"). However, for appending ("a") mode, we also need to download the file, but no error should be raised if the file does not exist.
Besides, SimpleCacheFileSystem should return a LocalTempFile instance if the mode is reading and writing ("r+") because the file can be updated.
Related code:
filesystem_spec/fsspec/implementations/cached.py
Lines 897 to 944 in 635b2ef
| def _open(self, path, mode="rb", **kwargs): | |
| path = self._strip_protocol(path) | |
| sha = self._mapper(path) | |
| if "r" not in mode: | |
| fn = os.path.join(self.storage[-1], sha) | |
| user_specified_kwargs = { | |
| k: v | |
| for k, v in kwargs.items() | |
| if k not in ["autocommit", "block_size", "cache_options"] | |
| } # those were added by open() | |
| return LocalTempFile( | |
| self, | |
| path, | |
| mode=mode, | |
| autocommit=not self._intrans, | |
| fn=fn, | |
| **user_specified_kwargs, | |
| ) | |
| fn = self._check_file(path) | |
| if fn: | |
| return open(fn, mode) | |
| fn = os.path.join(self.storage[-1], sha) | |
| logger.debug("Copying %s to local cache", path) | |
| kwargs["mode"] = mode | |
| self._mkcache() | |
| self._cache_size = None | |
| if self.compression: | |
| with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2: | |
| if isinstance(f, AbstractBufferedFile): | |
| # want no type of caching if just downloading whole thing | |
| f.cache = BaseCache(0, f.cache.fetcher, f.size) | |
| comp = ( | |
| infer_compression(path) | |
| if self.compression == "infer" | |
| else self.compression | |
| ) | |
| f = compr[comp](f, mode="rb") | |
| data = True | |
| while data: | |
| block = getattr(f, "blocksize", 5 * 2**20) | |
| data = f.read(block) | |
| f2.write(data) | |
| else: | |
| self.fs.get_file(path, fn) | |
| return self._open(path, mode) |
Suggested changes (simplified, without compression):
def _open(self, path, mode="rb", **kwargs):
path = self._strip_protocol(path)
sha = self._mapper(path)
# For read and append, (try) download from remote
if "r" in mode or "a" in mode:
fn = self._check_file(path)
if fn:
return open(fn, mode)
fn = os.path.join(self.storage[-1], sha)
self._mkcache()
self._cache_size = None
# append does not require an existing file but read does
if self.fs.exists(path):
self.fs.get_file(path, fn)
elif "r" in mode:
raise FileNotFoundError(path)
# just reading does not need special file handling
if "r" in mode and "+" not in mode:
return open(fn, mode)
fn = os.path.join(self.storage[-1], sha)
user_specified_kwargs = {
k: v
for k, v in kwargs.items()
if k not in ["autocommit", "block_size", "cache_options"]
} # those were added by open()
return LocalTempFile(
self,
path,
mode=mode,
autocommit=not self._intrans,
fn=fn,
**user_specified_kwargs,
)Related docs:
I'm willing to create a PR if the maintainers like the changes.
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels