-
-
Notifications
You must be signed in to change notification settings - Fork 814
Description
Trying to run the following code
### config ###
model_id = "NousResearch/Llama-2-7b-hf"
max_length = 512
device_map = "auto"
batch_size = 128
micro_batch_size = 32
gradient_accumulation_steps = batch_size // micro_batch_size
# nf4" use a symmetric quantization scheme with 4 bits precision
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
# load model from huggingface
model = AutoModelForCausalLM.from_pretrained(
model_id,
quantization_config=bnb_config,
use_cache=False,
device_map=device_map
)
# load tokenizer from huggingface
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
but get the error here :
AttributeError Traceback (most recent call last)
Cell In[7], line 18
10 bnb_config = BitsAndBytesConfig(
11 load_in_4bit=True,
12 bnb_4bit_use_double_quant=True,
13 bnb_4bit_quant_type="nf4",
14 bnb_4bit_compute_dtype=torch.bfloat16
15 )
17 # load model from huggingface
---> 18 model = AutoModelForCausalLM.from_pretrained(
19 model_id,
20 quantization_config=bnb_config,
21 use_cache=False,
22 device_map=device_map
23 )
25 # load tokenizer from huggingface
26 tokenizer = AutoTokenizer.from_pretrained(model_id)
File /opt/conda/envs/pytorch/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:484, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
482 elif type(config) in cls._model_mapping.keys():
483 model_class = _get_model_class(config, cls._model_mapping)
--> 484 return model_class.from_pretrained(
485 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
486 )
487 raise ValueError(
488 f"Unrecognized configuration class {config.class} for this kind of AutoModel: {cls.name}.\n"
489 f"Model type should be one of {', '.join(c.name for c in cls._model_mapping.keys())}."
490 )
File /opt/conda/envs/pytorch/lib/python3.10/site-packages/transformers/modeling_utils.py:2881, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
2871 if dtype_orig is not None:
2872 torch.set_default_dtype(dtype_orig)
2874 (
2875 model,
2876 missing_keys,
2877 unexpected_keys,
2878 mismatched_keys,
2879 offload_index,
2880 error_msgs,
-> 2881 ) = cls._load_pretrained_model(
2882 model,
2883 state_dict,
2884 loaded_state_dict_keys, # XXX: rename?
2885 resolved_archive_file,
2886 pretrained_model_name_or_path,
2887 ignore_mismatched_sizes=ignore_mismatched_sizes,
2888 sharded_metadata=sharded_metadata,
2889 _fast_init=_fast_init,
2890 low_cpu_mem_usage=low_cpu_mem_usage,
2891 device_map=device_map,
2892 offload_folder=offload_folder,
2893 offload_state_dict=offload_state_dict,
2894 dtype=torch_dtype,
2895 is_quantized=(load_in_8bit or load_in_4bit),
2896 keep_in_fp32_modules=keep_in_fp32_modules,
2897 )
2899 model.is_loaded_in_4bit = load_in_4bit
2900 model.is_loaded_in_8bit = load_in_8bit
File /opt/conda/envs/pytorch/lib/python3.10/site-packages/transformers/modeling_utils.py:3228, in PreTrainedModel._load_pretrained_model(cls, model, state_dict, loaded_keys, resolved_archive_file, pretrained_model_name_or_path, ignore_mismatched_sizes, sharded_metadata, _fast_init, low_cpu_mem_usage, device_map, offload_folder, offload_state_dict, dtype, is_quantized, keep_in_fp32_modules)
3218 mismatched_keys += _find_mismatched_keys(
3219 state_dict,
3220 model_state_dict,
(...)
3224 ignore_mismatched_sizes,
3225 )
3227 if low_cpu_mem_usage:
-> 3228 new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model(
3229 model_to_load,
3230 state_dict,
3231 loaded_keys,
3232 start_prefix,
3233 expected_keys,
3234 device_map=device_map,
3235 offload_folder=offload_folder,
3236 offload_index=offload_index,
3237 state_dict_folder=state_dict_folder,
3238 state_dict_index=state_dict_index,
3239 dtype=dtype,
3240 is_quantized=is_quantized,
3241 is_safetensors=is_safetensors,
3242 keep_in_fp32_modules=keep_in_fp32_modules,
3243 )
3244 error_msgs += new_error_msgs
3245 else:
File /opt/conda/envs/pytorch/lib/python3.10/site-packages/transformers/modeling_utils.py:728, in _load_state_dict_into_meta_model(model, state_dict, loaded_state_dict_keys, start_prefix, expected_keys, device_map, offload_folder, offload_index, state_dict_folder, state_dict_index, dtype, is_quantized, is_safetensors, keep_in_fp32_modules)
725 fp16_statistics = None
727 if "SCB" not in param_name:
--> 728 set_module_quantized_tensor_to_device(
729 model, param_name, param_device, value=param, fp16_statistics=fp16_statistics
730 )
732 return error_msgs, offload_index, state_dict_index
File /opt/conda/envs/pytorch/lib/python3.10/site-packages/transformers/utils/bitsandbytes.py:91, in set_module_quantized_tensor_to_device(module, tensor_name, device, value, fp16_statistics)
89 new_value = bnb.nn.Int8Params(new_value, requires_grad=False, **kwargs).to(device)
90 elif is_4bit:
---> 91 new_value = bnb.nn.Params4bit(new_value, requires_grad=False, **kwargs).to(device)
93 module._parameters[tensor_name] = new_value
94 if fp16_statistics is not None:
File /opt/conda/envs/pytorch/lib/python3.10/site-packages/bitsandbytes/nn/modules.py:178, in Params4bit.to(self, *args, **kwargs)
175 device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(*args, **kwargs)
177 if (device is not None and device.type == "cuda" and self.data.device.type == "cpu"):
--> 178 return self.cuda(device)
179 else:
180 s = self.quant_state
File /opt/conda/envs/pytorch/lib/python3.10/site-packages/bitsandbytes/nn/modules.py:156, in Params4bit.cuda(self, device)
154 def cuda(self, device):
155 w = self.data.contiguous().half().cuda(device)
--> 156 w_4bit, quant_state = bnb.functional.quantize_4bit(w, blocksize=self.blocksize, compress_statistics=self.compress_statistics, quant_type=self.quant_type)
157 self.data = w_4bit
158 self.quant_state = quant_state
File /opt/conda/envs/pytorch/lib/python3.10/site-packages/bitsandbytes/functional.py:832, in quantize_4bit(A, absmax, out, blocksize, compress_statistics, quant_type)
830 lib.cquantize_blockwise_fp16_fp4(get_ptr(None), get_ptr(A), get_ptr(absmax), get_ptr(out), ct.c_int32(blocksize), ct.c_int(n))
831 else:
--> 832 lib.cquantize_blockwise_fp16_nf4(get_ptr(None), get_ptr(A), get_ptr(absmax), get_ptr(out), ct.c_int32(blocksize), ct.c_int(n))
833 elif A.dtype == torch.bfloat16:
834 if quant_type == 'fp4':
AttributeError: 'NoneType' object has no attribute 'cquantize_blockwise_fp16_nf4'
