- Did you update?
pip install --upgrade unsloth unsloth_zoo Yes
Colab or Kaggle or local / cloud local
- Number GPUs used, use
nvidia-smi 1 RTX4090 24Gb
- Which notebook? Please link!
- Which Unsloth version, TRL version, transformers version, PyTorch version?
- Which trainer?
SFTTrainer, GRPOTrainer etc SFTTrainer
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/Qwen3-Coder-30B-A3B-Instruct",
max_seq_length = max_seq_length,
load_in_4bit = True,
quantization_config=quantization_config,
device_map= {"":"cuda:0"},
offload_embedding = True, # Reduces VRAM by 1GB
unsloth_force_compile = True,
)
model = FastLanguageModel.get_peft_model(
model,
r = lora_rank, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_alpha = lora_rank*2,
lora_dropout = 0, # Supports any, but = 0 is optimized
bias = "none", # Supports any, but = "none" is optimized
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
random_state = 3407,
use_rslora = False, # We support rank stabilized LoRA
loftq_config = None, # And LoftQ
)
trainer.train(resume_from_checkpoint = False)
File ~/.unsloth2712/lib/python3.12/site-packages/unsloth_zoo/fused_losses/cross_entropy_loss.py:205, in UnslothFusedLoss.forward(ctx, loss_function, hidden_states, lm_head_weight, lm_head_bias, labels, mask, n_items, scaling, shift_labels, target_gb, torch_compile, overwrite, extra_kwargs)
203 n_chunks = extra_kwargs.pop("n_chunks")
204 else:
--> 205 n_chunks = get_chunk_size(bsz, qlen, vocab_size, target_gb = target_gb)
206 if UNSLOTH_ENABLE_LOGGING:
207 logger.info(f"Fused CE Loss [bsz={bsz}][qlen={qlen}][vocab_size={vocab_size}][n_chunks={n_chunks}]")
File ~/.unsloth2712/lib/python3.12/site-packages/unsloth_zoo/fused_losses/cross_entropy_loss.py:133, in get_chunk_size(bsz, qlen, vocab_size, target_gb)
131 def get_chunk_size(bsz, qlen, vocab_size, target_gb = None):
132 """ Gets chunk size that fits the target max memory usage (1GB) """
--> 133 multiplier = _get_chunk_multiplier(vocab_size, target_gb)
134 n_splits = (bsz*qlen) * multiplier
135 # n_splits = max(round(n_splits / 4) * 4, 1) # Output only multiples of 4
File ~/.unsloth2712/lib/python3.12/site-packages/unsloth_zoo/fused_losses/cross_entropy_loss.py:126, in _get_chunk_multiplier(vocab_size, target_gb)
124 target_gb = free_gb
125 pass
--> 126 multiplier = (vocab_size * 4 / 1024 / 1024 / 1024) / (target_gb)
127 multiplier = multiplier / 4 # Output only multiples of 4
128 return multiplier
ZeroDivisionError: float division by zero
pip install --upgrade unsloth unsloth_zooYesColaborKaggleor local / cloud localnvidia-smi1 RTX4090 24GbSFTTrainer,GRPOTraineretc SFTTrainermodel, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/Qwen3-Coder-30B-A3B-Instruct",
max_seq_length = max_seq_length,
load_in_4bit = True,
quantization_config=quantization_config,
device_map= {"":"cuda:0"},
offload_embedding = True, # Reduces VRAM by 1GB
unsloth_force_compile = True,
)
model = FastLanguageModel.get_peft_model(
model,
r = lora_rank, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_alpha = lora_rank*2,
lora_dropout = 0, # Supports any, but = 0 is optimized
bias = "none", # Supports any, but = "none" is optimized
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
random_state = 3407,
use_rslora = False, # We support rank stabilized LoRA
loftq_config = None, # And LoftQ
)
trainer.train(resume_from_checkpoint = False)
File ~/.unsloth2712/lib/python3.12/site-packages/unsloth_zoo/fused_losses/cross_entropy_loss.py:205, in UnslothFusedLoss.forward(ctx, loss_function, hidden_states, lm_head_weight, lm_head_bias, labels, mask, n_items, scaling, shift_labels, target_gb, torch_compile, overwrite, extra_kwargs)
203 n_chunks = extra_kwargs.pop("n_chunks")
204 else:
--> 205 n_chunks = get_chunk_size(bsz, qlen, vocab_size, target_gb = target_gb)
206 if UNSLOTH_ENABLE_LOGGING:
207 logger.info(f"Fused CE Loss [bsz={bsz}][qlen={qlen}][vocab_size={vocab_size}][n_chunks={n_chunks}]")
File ~/.unsloth2712/lib/python3.12/site-packages/unsloth_zoo/fused_losses/cross_entropy_loss.py:133, in get_chunk_size(bsz, qlen, vocab_size, target_gb)
131 def get_chunk_size(bsz, qlen, vocab_size, target_gb = None):
132 """ Gets chunk size that fits the target max memory usage (1GB) """
--> 133 multiplier = _get_chunk_multiplier(vocab_size, target_gb)
134 n_splits = (bsz*qlen) * multiplier
135 # n_splits = max(round(n_splits / 4) * 4, 1) # Output only multiples of 4
File ~/.unsloth2712/lib/python3.12/site-packages/unsloth_zoo/fused_losses/cross_entropy_loss.py:126, in _get_chunk_multiplier(vocab_size, target_gb)
124 target_gb = free_gb
125 pass
--> 126 multiplier = (vocab_size * 4 / 1024 / 1024 / 1024) / (target_gb)
127 multiplier = multiplier / 4 # Output only multiples of 4
128 return multiplier
ZeroDivisionError: float division by zero