trainer = SFTTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=dataset,
dataset_text_field="text",
max_seq_length=2048,
args = SFTConfig(
per_device_train_batch_size = 2,
gradient_accumulation_steps = 4,
warmup_steps = 5,
max_steps = 60,
logging_steps = 1,
output_dir = "outputs",
optim = "adamw_8bit",
seed = 3407,
),
)
trainer_stats = trainer.train()
Using the above code, I ft qwen3-coder-30b
File "", line 40, in _unsloth_training_step
File "/home/user/lyz/chenhong/Qwen3-Coder-main/finetuning/unsolth/unsloth_compiled_cache/UnslothSFTTrainer.py", line 1071, in compute_loss
outputs = super().compute_loss(
^^^^^^^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/unsloth/models/_utils.py", line 1642, in _unsloth_pre_compute_loss
outputs = self._old_compute_loss(model, inputs, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "", line 36, in compute_loss
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/accelerate/utils/operations.py", line 819, in forward
return model_forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/accelerate/utils/operations.py", line 807, in call
return convert_to_fp32(self.model_forward(*args, **kwargs))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/peft/peft_model.py", line 1923, in forward
return self.base_model(
^^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1786, in call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/peft/tuners/tuners_utils.py", line 308, in forward
return self.model.forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/lyz/chenhong/Qwen3-Coder-main/finetuning/unsolth/unsloth_compiled_cache/unsloth_compiled_module_qwen3_moe.py", line 726, in forward
return Qwen3MoeForCausalLM_forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_router_logits, cache_position, logits_to
keep, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/torch/_dynamo/external_utils.py", line 196, in nonrecursive_disable_wrapper
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/transformers/utils/generic.py", line 918, in wrapper
output = func(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/lyz/chenhong/Qwen3-Coder-main/finetuning/unsolth/unsloth_compiled_cache/unsloth_compiled_module_qwen3_moe.py", line 569, in Qwen3MoeForCausalLM_forward
n_items = (kwargs).get("num_items_in_batch", None) or (kwargs).get("n_items", None)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: Boolean value of Tensor with more than one value is ambiguous
trainer = SFTTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=dataset,
dataset_text_field="text",
max_seq_length=2048,
args = SFTConfig(
per_device_train_batch_size = 2,
gradient_accumulation_steps = 4,
warmup_steps = 5,
max_steps = 60,
logging_steps = 1,
output_dir = "outputs",
optim = "adamw_8bit",
seed = 3407,
),
)
trainer_stats = trainer.train()
Using the above code, I ft qwen3-coder-30b
File "", line 40, in _unsloth_training_step
File "/home/user/lyz/chenhong/Qwen3-Coder-main/finetuning/unsolth/unsloth_compiled_cache/UnslothSFTTrainer.py", line 1071, in compute_loss
outputs = super().compute_loss(
^^^^^^^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/unsloth/models/_utils.py", line 1642, in _unsloth_pre_compute_loss
outputs = self._old_compute_loss(model, inputs, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "", line 36, in compute_loss
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/accelerate/utils/operations.py", line 819, in forward
return model_forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/accelerate/utils/operations.py", line 807, in call
return convert_to_fp32(self.model_forward(*args, **kwargs))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/peft/peft_model.py", line 1923, in forward
return self.base_model(
^^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1786, in call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/peft/tuners/tuners_utils.py", line 308, in forward
return self.model.forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/lyz/chenhong/Qwen3-Coder-main/finetuning/unsolth/unsloth_compiled_cache/unsloth_compiled_module_qwen3_moe.py", line 726, in forward
return Qwen3MoeForCausalLM_forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_router_logits, cache_position, logits_to
keep, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/torch/_dynamo/external_utils.py", line 196, in nonrecursive_disable_wrapper
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/gpu/1-Program/miniforge3/envs/unsloth_env/lib/python3.11/site-packages/transformers/utils/generic.py", line 918, in wrapper
output = func(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/lyz/chenhong/Qwen3-Coder-main/finetuning/unsolth/unsloth_compiled_cache/unsloth_compiled_module_qwen3_moe.py", line 569, in Qwen3MoeForCausalLM_forward
n_items = (kwargs).get("num_items_in_batch", None) or (kwargs).get("n_items", None)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: Boolean value of Tensor with more than one value is ambiguous