Currently, SimpleCacheFileSystem only get the file from underlying file system when the mode is reading ("r"). However, for appending ("a") mode, we also need to download the file, but no error should be raised if the file does not exist.
Besides, SimpleCacheFileSystem should return a LocalTempFile instance if the mode is reading and writing ("r+") because the file can be updated.
Related code:
|
def _open(self, path, mode="rb", **kwargs): |
|
path = self._strip_protocol(path) |
|
sha = self._mapper(path) |
|
|
|
if "r" not in mode: |
|
fn = os.path.join(self.storage[-1], sha) |
|
user_specified_kwargs = { |
|
k: v |
|
for k, v in kwargs.items() |
|
if k not in ["autocommit", "block_size", "cache_options"] |
|
} # those were added by open() |
|
return LocalTempFile( |
|
self, |
|
path, |
|
mode=mode, |
|
autocommit=not self._intrans, |
|
fn=fn, |
|
**user_specified_kwargs, |
|
) |
|
fn = self._check_file(path) |
|
if fn: |
|
return open(fn, mode) |
|
|
|
fn = os.path.join(self.storage[-1], sha) |
|
logger.debug("Copying %s to local cache", path) |
|
kwargs["mode"] = mode |
|
|
|
self._mkcache() |
|
self._cache_size = None |
|
if self.compression: |
|
with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2: |
|
if isinstance(f, AbstractBufferedFile): |
|
# want no type of caching if just downloading whole thing |
|
f.cache = BaseCache(0, f.cache.fetcher, f.size) |
|
comp = ( |
|
infer_compression(path) |
|
if self.compression == "infer" |
|
else self.compression |
|
) |
|
f = compr[comp](f, mode="rb") |
|
data = True |
|
while data: |
|
block = getattr(f, "blocksize", 5 * 2**20) |
|
data = f.read(block) |
|
f2.write(data) |
|
else: |
|
self.fs.get_file(path, fn) |
|
return self._open(path, mode) |
Suggested changes (simplified, without compression):
def _open(self, path, mode="rb", **kwargs):
path = self._strip_protocol(path)
sha = self._mapper(path)
# For read and append, (try) download from remote
if "r" in mode or "a" in mode:
fn = self._check_file(path)
if fn:
return open(fn, mode)
fn = os.path.join(self.storage[-1], sha)
self._mkcache()
self._cache_size = None
# append does not require an existing file but read does
if self.fs.exists(path):
self.fs.get_file(path, fn)
elif "r" in mode:
raise FileNotFoundError(path)
# just reading does not need special file handling
if "r" in mode and "+" not in mode:
return open(fn, mode)
fn = os.path.join(self.storage[-1], sha)
user_specified_kwargs = {
k: v
for k, v in kwargs.items()
if k not in ["autocommit", "block_size", "cache_options"]
} # those were added by open()
return LocalTempFile(
self,
path,
mode=mode,
autocommit=not self._intrans,
fn=fn,
**user_specified_kwargs,
)
Related docs:
I'm willing to create a PR if the maintainers like the changes.
Currently,
SimpleCacheFileSystemonly get the file from underlying file system when the mode is reading ("r"). However, for appending ("a") mode, we also need to download the file, but no error should be raised if the file does not exist.Besides,
SimpleCacheFileSystemshould return aLocalTempFileinstance if the mode is reading and writing ("r+") because the file can be updated.Related code:
filesystem_spec/fsspec/implementations/cached.py
Lines 897 to 944 in 635b2ef
Suggested changes (simplified, without compression):
Related docs:
I'm willing to create a PR if the maintainers like the changes.