-
Notifications
You must be signed in to change notification settings - Fork 27.3k
Description
🐛 Describe the bug
Suddenly, some of my package's tests fail on GitHub Actions, but only with Python version 3.11 (not 3.10), and only on Linux (not on Mac OS M1). In particular, they fail when I try to load a state dict pickled with torch.save:
data = torch.load('<saved_dict.pth.tar>', map_location=torch.device('cpu'))The two problematic files are here: https://github.com/DeepRank/deeprank2/tree/dev/tests/data/pretrained
Here is the failed action: https://github.com/DeepRank/deeprank2/actions/runs/7400017804/job/20132736281?pr=515, from which you can also see how the same tests do not fail with Python 3.10.
Note that it may be something similar to issue #80831
Error message:
tests/test_trainer.py:686:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/usr/share/miniconda/envs/__setup_conda/lib/python3.11/site-packages/deeprank2/dataset.py:787: in __init__
self._check_and_inherit_train(GraphDataset, self.inherited_params)
/usr/share/miniconda/envs/__setup_conda/lib/python3.11/site-packages/deeprank2/dataset.py:93: in _check_and_inherit_train
data = torch.load(self.train_source, map_location=torch.device('cpu'))
/usr/share/miniconda/envs/__setup_conda/lib/python3.11/site-packages/torch/serialization.py:809: in load
return _load(opened_zipfile, map_location, pickle_module, **pickle_load_args)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
zip_file = <torch.PyTorchFileReader object at 0x7f80062597b0>
map_location = device(type='cpu')
pickle_module = <module 'pickle' from '/usr/share/miniconda/envs/__setup_conda/lib/python3.11/pickle.py'>
pickle_file = 'data.pkl', pickle_load_args = {'encoding': 'utf-8'}
persistent_load = <function _load.<locals>.persistent_load at 0x7f801ddf2fc0>
UnpicklerWrapper = <class 'torch.serialization._load.<locals>.UnpicklerWrapper'>
data_file = <_io.BytesIO object at 0x7f800510e0c0>
unpickler = <torch.serialization._load.<locals>.UnpicklerWrapper object at 0x7f80063e07d0>
def _load(zip_file, map_location, pickle_module, pickle_file='data.pkl', **pickle_load_args):
restore_location = _get_restore_location(map_location)
loaded_storages = {}
def load_tensor(dtype, numel, key, location):
name = f'data/{key}'
storage = zip_file.get_storage_from_record(name, numel, torch.UntypedStorage)._typed_storage()._untyped_storage
# TODO: Once we decide to break serialization FC, we can
# stop wrapping with TypedStorage
typed_storage = torch.storage.TypedStorage(
wrap_storage=restore_location(storage, location),
dtype=dtype,
_internal=True)
if typed_storage._data_ptr() != 0:
loaded_storages[key] = typed_storage
return typed_storage
def persistent_load(saved_id):
assert isinstance(saved_id, tuple)
typename = _maybe_decode_ascii(saved_id[0])
data = saved_id[1:]
assert typename == 'storage', \
f"Unknown typename for persistent_load, expected 'storage' but got '{typename}'"
storage_type, key, location, numel = data
if storage_type is torch.UntypedStorage:
dtype = torch.uint8
else:
dtype = storage_type.dtype
if key in loaded_storages:
typed_storage = loaded_storages[key]
else:
nbytes = numel * torch._utils._element_size(dtype)
typed_storage = load_tensor(dtype, nbytes, key, _maybe_decode_ascii(location))
return typed_storage
load_module_mapping: Dict[str, str] = {
# See https://github.com/pytorch/pytorch/pull/51633
'torch.tensor': 'torch._tensor'
}
# Need to subclass Unpickler instead of directly monkey-patching the find_class method
# because it's marked readonly in pickle.
# The type: ignore is because mypy can't statically determine the type of this class.
class UnpicklerWrapper(pickle_module.Unpickler): # type: ignore[name-defined]
# from https://stackoverflow.com/questions/133[984](https://github.com/DeepRank/deeprank2/actions/runs/7400017804/job/20132736281?pr=515#step:4:985)62/unpickling-python-objects-with-a-changed-module-path/13405732
# Lets us override the imports that pickle uses when unpickling an object.
# This is useful for maintaining BC if we change a module path that tensor instantiation relies on.
def find_class(self, mod_name, name):
if type(name) is str and 'Storage' in name:
try:
return StorageType(name)
except KeyError:
pass
mod_name = load_module_mapping.get(mod_name, mod_name)
return super().find_class(mod_name, name)
# Load the data (which may in turn use `persistent_load` to load tensors)
data_file = io.BytesIO(zip_file.get_record(pickle_file))
unpickler = UnpicklerWrapper(data_file, **pickle_load_args)
unpickler.persistent_load = persistent_load
> result = unpickler.load()
E AttributeError: 'Adam' object has no attribute 'param_groups'
/usr/share/miniconda/envs/__setup_conda/lib/python3.11/site-packages/torch/serialization.py:1172: AttributeError
Versions
PyTorch version: 2.0.1
Is debug build: False
OS: Ubuntu 22.04.3, LTS
Image version: 20231217.2.0
Included Software: https://github.com/actions/runner-images/blob/ubuntu22/20231217.2/images/ubuntu/Ubuntu2204-Readme.md
Image Release: https://github.com/actions/runner-images/releases/tag/ubuntu22%2F20231217.2
Conda version: 23.11.0
Python version: 3.11.5
Is CUDA available: False
Versions of relevant libraries:
[conda] pytorch 2.0.1 py3.11_cpu_0 pytorch
[conda] pytorch-mutex 1.0 cpu pytorch
[conda] torchaudio 2.0.2 py311_cpu pytorch
[conda] torchvision 0.15.2 py311_cpu pytorch
cc @vincentqb @jbschlosser @albanD @janeyx99 @crcrpar @mruberry @mikaylagawarecki