i have FT a gemma2b it on a qna dataset and saved the lora adapters.
one thing i want to know is :
import torch
if True:
from unsloth import FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "lora_gemma2b-it-model", # the lora adapters saved after training
max_seq_length = 2048,
dtype = torch.float16,
load_in_4bit = False,
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
1.does this snippet from Gemma FT loads and merge the adapters behind the scene with the base model behind the scenes?
2. i have loaded the gemma2bit in float16, here are the configs:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype=torch.float16 # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = False # Use 4bit quantization to reduce memory usage. Can be False.
# model, tokenizer = FastLanguageModel.from_pretrained(
# model_name = "google/gemma-2b-it",
# max_seq_length = max_seq_length,
# dtype = dtype,
# load_in_4bit = load_in_4bit,
# token = "MxjfOiYDuaF", # use one if using gated models like meta-llama/Llama-2-7b-hf
# )
while merging it using:
adapter="lora_gemma2b-it-model"
model_name="google/gemma-2b-it"
from peft import PeftModel,LoraConfig, get_peft_model
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(model_name,torch_dtype=torch.float16,revision="float16")
model = PeftModel.from_pretrained(model, adapter)
model = model.merge_and_unload()
model.save_pretrained("./naive_merge/", safe_serialization=True)
#inference
from transformers import AutoModelForCausalLM,AutoTokenizer
model = AutoModelForCausalLM.from_pretrained(
"naive_merge",
#token = "hf_NXHmsbBxjfOiYDuaF",
torch_dtype=torch.float16,
)
model.to("cuda")
tokenizer = AutoTokenizer.from_pretrained("merged_model4")
while inferencing i get : 'GemmaRMSNorm' object has no attribute 'variance_epsilon'
i have FT a gemma2b it on a qna dataset and saved the lora adapters.
one thing i want to know is :
1.does this snippet from Gemma FT loads and merge the adapters behind the scene with the base model behind the scenes?
2. i have loaded the gemma2bit in float16, here are the configs:
while merging it using:
while inferencing i get : 'GemmaRMSNorm' object has no attribute 'variance_epsilon'