Skip to content

The model saved by save_pretrained_gguf does not have lora #3229

@songjiahao-wq

Description

@songjiahao-wq

I used the safetensors model saved by save_pretrained_gguf, but the output did not have the fine-tuned lora parameters

import os, torch
from unsloth import FastLanguageModel
os.environ["CUDA_VISIBLE_DEVICES"] = "0"   # 只使用 3070 Ti
MODEL_DIR = "model"   # ← 你的合并后HF目录
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
os.environ["http_proxy"] = "http://192.168.1.222:8080"
os.environ["https_proxy"] = "http://192.168.1.222:8080"
# 建议用半精度推理;若显存紧缺可换 torch.float16
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name      = MODEL_DIR,
    max_seq_length  = 2048,
    load_in_4bit    = True,           # 合并后通常按全精/8bit保存;若你确实保存为4bit可设 True
    # torch_dtype     = torch.bfloat16 if torch.cuda.is_available() else torch.float32,
)
# FastLanguageModel.for_inference(model)   # 推理加速
if model.device.type != DEVICE:
    model.to(DEVICE)
model.eval()

def chat(user_text: str, max_new_tokens: int = 2048) -> str:
    messages = [
        # {"role":"user","content":"你是一个体能训练大模型"},
        {"role":"user","content": user_text},
    ]
    prompt = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True,
        # 注意:不是 reasoning 基座就不要开 enable_thinking
        enable_thinking=True,
    )
    inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            do_sample=True, temperature=0.6, top_p=0.95, top_k=20,
            max_new_tokens=max_new_tokens,
            renormalize_logits=True,                     # 防止数值异常
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    new_ids = output_ids[0, inputs["input_ids"].shape[1]:]
    return tokenizer.decode(new_ids, skip_special_tokens=True).strip()

print(chat("你是一个体能训练大模型,你是谁开发的?"))

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions