AttributeError: 'NoneType' object has no attribute 'cquantize_blockwise_fp16_nf4'

![image](https://github.com/TimDettmers/bitsandbytes/assets/5212919/009b60c5-6d62-4a19-81c9-c32417297cd5)

Trying to run the following code
```

### config ###
model_id = "NousResearch/Llama-2-7b-hf"
max_length = 512
device_map = "auto"
batch_size = 128
micro_batch_size = 32
gradient_accumulation_steps = batch_size // micro_batch_size

# nf4" use a symmetric quantization scheme with 4 bits precision
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# load model from huggingface
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    use_cache=False,
    device_map=device_map
)

# load tokenizer from huggingface
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
```


but get the error here : 



---------------------------------------------------------------------------
**AttributeError**                            Traceback (most recent call last)
Cell In[7], line 18
     10 bnb_config = BitsAndBytesConfig(
     11     load_in_4bit=True,
     12     bnb_4bit_use_double_quant=True,
     13     bnb_4bit_quant_type="nf4",
     14     bnb_4bit_compute_dtype=torch.bfloat16
     15 )
     17 # load model from huggingface
---> 18 model = AutoModelForCausalLM.from_pretrained(
     19     model_id,
     20     quantization_config=bnb_config,
     21     use_cache=False,
     22     device_map=device_map
     23 )
     25 # load tokenizer from huggingface
     26 tokenizer = AutoTokenizer.from_pretrained(model_id)

File /opt/conda/envs/pytorch/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:484, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
    482 elif type(config) in cls._model_mapping.keys():
    483     model_class = _get_model_class(config, cls._model_mapping)
--> 484     return model_class.from_pretrained(
    485         pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
    486     )
    487 raise ValueError(
    488     f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
    489     f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}."
    490 )

File /opt/conda/envs/pytorch/lib/python3.10/site-packages/transformers/modeling_utils.py:2881, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
   2871     if dtype_orig is not None:
   2872         torch.set_default_dtype(dtype_orig)
   2874     (
   2875         model,
   2876         missing_keys,
   2877         unexpected_keys,
   2878         mismatched_keys,
   2879         offload_index,
   2880         error_msgs,
-> 2881     ) = cls._load_pretrained_model(
   2882         model,
   2883         state_dict,
   2884         loaded_state_dict_keys,  # XXX: rename?
   2885         resolved_archive_file,
   2886         pretrained_model_name_or_path,
   2887         ignore_mismatched_sizes=ignore_mismatched_sizes,
   2888         sharded_metadata=sharded_metadata,
   2889         _fast_init=_fast_init,
   2890         low_cpu_mem_usage=low_cpu_mem_usage,
   2891         device_map=device_map,
   2892         offload_folder=offload_folder,
   2893         offload_state_dict=offload_state_dict,
   2894         dtype=torch_dtype,
   2895         is_quantized=(load_in_8bit or load_in_4bit),
   2896         keep_in_fp32_modules=keep_in_fp32_modules,
   2897     )
   2899 model.is_loaded_in_4bit = load_in_4bit
   2900 model.is_loaded_in_8bit = load_in_8bit

File /opt/conda/envs/pytorch/lib/python3.10/site-packages/transformers/modeling_utils.py:3228, in PreTrainedModel._load_pretrained_model(cls, model, state_dict, loaded_keys, resolved_archive_file, pretrained_model_name_or_path, ignore_mismatched_sizes, sharded_metadata, _fast_init, low_cpu_mem_usage, device_map, offload_folder, offload_state_dict, dtype, is_quantized, keep_in_fp32_modules)
   3218 mismatched_keys += _find_mismatched_keys(
   3219     state_dict,
   3220     model_state_dict,
   (...)
   3224     ignore_mismatched_sizes,
   3225 )
   3227 if low_cpu_mem_usage:
-> 3228     new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model(
   3229         model_to_load,
   3230         state_dict,
   3231         loaded_keys,
   3232         start_prefix,
   3233         expected_keys,
   3234         device_map=device_map,
   3235         offload_folder=offload_folder,
   3236         offload_index=offload_index,
   3237         state_dict_folder=state_dict_folder,
   3238         state_dict_index=state_dict_index,
   3239         dtype=dtype,
   3240         is_quantized=is_quantized,
   3241         is_safetensors=is_safetensors,
   3242         keep_in_fp32_modules=keep_in_fp32_modules,
   3243     )
   3244     error_msgs += new_error_msgs
   3245 else:

File /opt/conda/envs/pytorch/lib/python3.10/site-packages/transformers/modeling_utils.py:728, in _load_state_dict_into_meta_model(model, state_dict, loaded_state_dict_keys, start_prefix, expected_keys, device_map, offload_folder, offload_index, state_dict_folder, state_dict_index, dtype, is_quantized, is_safetensors, keep_in_fp32_modules)
    725             fp16_statistics = None
    727         if "SCB" not in param_name:
--> 728             set_module_quantized_tensor_to_device(
    729                 model, param_name, param_device, value=param, fp16_statistics=fp16_statistics
    730             )
    732 return error_msgs, offload_index, state_dict_index

File /opt/conda/envs/pytorch/lib/python3.10/site-packages/transformers/utils/bitsandbytes.py:91, in set_module_quantized_tensor_to_device(module, tensor_name, device, value, fp16_statistics)
     89     new_value = bnb.nn.Int8Params(new_value, requires_grad=False, **kwargs).to(device)
     90 elif is_4bit:
---> 91     new_value = bnb.nn.Params4bit(new_value, requires_grad=False, **kwargs).to(device)
     93 module._parameters[tensor_name] = new_value
     94 if fp16_statistics is not None:

File /opt/conda/envs/pytorch/lib/python3.10/site-packages/bitsandbytes/nn/modules.py:178, in Params4bit.to(self, *args, **kwargs)
    175 device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(*args, **kwargs)
    177 if (device is not None and device.type == "cuda" and self.data.device.type == "cpu"):
--> 178     return self.cuda(device)
    179 else:
    180     s = self.quant_state

File /opt/conda/envs/pytorch/lib/python3.10/site-packages/bitsandbytes/nn/modules.py:156, in Params4bit.cuda(self, device)
    154 def cuda(self, device):
    155     w = self.data.contiguous().half().cuda(device)
--> 156     w_4bit, quant_state = bnb.functional.quantize_4bit(w, blocksize=self.blocksize, compress_statistics=self.compress_statistics, quant_type=self.quant_type)
    157     self.data = w_4bit
    158     self.quant_state = quant_state

File /opt/conda/envs/pytorch/lib/python3.10/site-packages/bitsandbytes/functional.py:832, in quantize_4bit(A, absmax, out, blocksize, compress_statistics, quant_type)
    830         lib.cquantize_blockwise_fp16_fp4(get_ptr(None), get_ptr(A), get_ptr(absmax), get_ptr(out), ct.c_int32(blocksize), ct.c_int(n))
    831     else:
--> 832         lib.cquantize_blockwise_fp16_nf4(get_ptr(None), get_ptr(A), get_ptr(absmax), get_ptr(out), ct.c_int32(blocksize), ct.c_int(n))
    833 elif A.dtype == torch.bfloat16:
    834     if quant_type == 'fp4':

**AttributeError**: 'NoneType' object has no attribute '**cquantize_blockwise_fp16_nf4**'

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

AttributeError: 'NoneType' object has no attribute 'cquantize_blockwise_fp16_nf4' #693

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Uh oh!

AttributeError: 'NoneType' object has no attribute 'cquantize_blockwise_fp16_nf4' #693

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions