Skip to content

importing unsloth before trl on multi image grpo training gives mismatch error #5183

@jaaabir

Description

@jaaabir

Note: Please do not remove the questions. Answer beside them.

  1. Did you update? pip install --upgrade unsloth unsloth_zoo: yes
  2. Colab or Kaggle or local / cloud: cloud ( a40 nvidia gpu )
  3. Number GPUs used, use nvidia-smi: 1
  4. Which notebook? Please link! : no
  5. Which Unsloth version, TRL version, transformers version, PyTorch version?: 2026.4.6, 1.2.0, 5.5.4, 2.10.0
  6. Which trainer? SFTTrainer, GRPOTrainer etc: GRPOTrainer
#  Testing script

from datasets import load_dataset
from trl import GRPOConfig, GRPOTrainer
from unsloth import FastVisionModel
from peft import LoraConfig
import torch 
from datasets import Sequence, Image as HFImage
import os 


# If not handled properly, prompt truncation may truncate image token
dataset = load_dataset("trl-internal-testing/zen-multi-image", "conversational_prompt_only", split="train")
dataset = dataset.filter(lambda x: len(x["images"]) == 2) 


def my_reward_function(prompts, completions, **kwargs):
    return [1.0] * len(prompts)

model, processor = FastVisionModel.from_pretrained(
            "./outputs/qwen3.5-2b-invoice-finetuned",
            max_seq_length=55_000,
            dtype=torch.bfloat16,
            load_in_4bit=False,
            trust_remote_code=True,
            device_map="auto",
            attn_implementation="flash_attention_2",
            fast_inference=False
        )

model = FastVisionModel.get_peft_model(
            model,
        )

training_args = GRPOConfig(
    output_dir="tmp_dir",   
    # ── generation ─────────────────────────────────────────
            # max_new_tokens=self.max_new_tokens,
            temperature=0.9,       # higher than inference — need diverse rollouts
            top_p=0.95,
            top_k=20,
            num_generations=4,     # G=4 rollouts per prompt
            # ── training ───────────────────────────────────────────
            num_train_epochs=1,
            learning_rate=1e-5,
            weight_decay=0.01,
            max_grad_norm=0.5,
            warmup_steps=20,
            lr_scheduler_type="cosine",
            optim="adamw_8bit",
            # ── batch / accumulation ───────────────────────────────
            per_device_train_batch_size=1,
            gradient_accumulation_steps=4,
            # ── GRPO-specific ──────────────────────────────────────
            beta=0.04,             # KL penalty coefficient
            epsilon=0.2,           # PPO clip ratio
            # ── memory ─────────────────────────────────────────────
            bf16=True,
            fp16=False,
            use_cache=False,
            # ── dataloader ─────────────────────────────────────────
            dataloader_num_workers=2,
            dataloader_pin_memory=True,
            # ── saving / logging ───────────────────────────────────
            save_strategy="epoch",
            save_total_limit=2,
            logging_steps=5,
            report_to="none",
            seed=4123,
            remove_unused_columns=False,
            use_vllm=False,
            log_completions = True,
            # max_prompt_length = 45_000,
            max_completion_length = 20_000,
            importance_sampling_level = "sequence",
            # mask_truncated_completions = False,
            loss_type = "vespo"
        )

trainer = GRPOTrainer(
    model=model,
    processing_class=processor,
    reward_funcs=my_reward_function,  # define a dummy reward function
    args=training_args,
    train_dataset=dataset,
)

previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()}

trainer.train()

Explanation:

  • I get this mismatch in the image tokens and image features if i import unsloth before trl but doesn't give that same error if i import trl before unsloth.
  • This only happens when the sample has multiple images but doesn't give any error if a sample has one image.
  • Also the error shows that for a sample with images == 2 , it seems to use only one image and thats the reason for model to produce image feature for only one image.

error:

Traceback (most recent call last):
  File "/workspace/ocr/qwen_grpo2.py", line 350, in <module>
    main()
  File "/workspace/ocr/qwen_grpo2.py", line 346, in main
    trainer.train()
  File "/workspace/ocr/qwen_grpo2.py", line 290, in train
    result =self.trainer.train()
            ^^^^^^^^^^^^^^^^^^^^
  File "/workspace/ocr/unsloth_compiled_cache/UnslothGRPOTrainer.py", line 84, in wrapper
    output = f(self, *args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/stable-venv/lib/python3.12/site-packages/unsloth/models/rl.py", line 144, in _unsloth_train_with_resume_guard
    return original_train(self, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/stable-venv/lib/python3.12/site-packages/transformers/trainer.py", line 1425, in train
    return inner_training_loop(
           ^^^^^^^^^^^^^^^^^^^^
  File "/workspace/stable-venv/lib/python3.12/site-packages/transformers/trainer.py", line 1507, in _inner_training_loop
    self._run_epoch(
  File "/workspace/stable-venv/lib/python3.12/site-packages/transformers/trainer.py", line 1735, in _run_epoch
    tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/ocr/unsloth_compiled_cache/UnslothGRPOTrainer.py", line 3086, in training_step
    output = super().training_step(model, inputs, num_items_in_batch)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<string>", line 34, in _unsloth_training_step
  File "/workspace/stable-venv/lib/python3.12/site-packages/trl/extras/profiling.py", line 202, in wrapper
    return func(self, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/ocr/unsloth_compiled_cache/UnslothGRPOTrainer.py", line 3115, in _prepare_inputs
    generation_batch = self._generate_and_score_completions(generation_batch)
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/stable-venv/lib/python3.12/site-packages/unsloth/models/rl.py", line 526, in wrapped
    return original(self, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/ocr/unsloth_compiled_cache/UnslothGRPOTrainer.py", line 4111, in _generate_and_score_completions
    ref_per_token_logps, _ = self._get_per_token_logps_and_entropies(
                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/ocr/unsloth_compiled_cache/UnslothGRPOTrainer.py", line 3021, in _get_per_token_logps_and_entropies
    logits_chunk = unwrapped_model(
                   ^^^^^^^^^^^^^^^^
  File "/workspace/stable-venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1776, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/stable-venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1787, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/stable-venv/lib/python3.12/site-packages/peft/peft_model.py", line 1993, in forward
    return self.base_model(
           ^^^^^^^^^^^^^^^^
  File "/workspace/stable-venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1776, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/stable-venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1787, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/stable-venv/lib/python3.12/site-packages/peft/tuners/tuners_utils.py", line 330, in forward
    return self.model.forward(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/stable-venv/lib/python3.12/site-packages/transformers/utils/generic.py", line 876, in wrapper
    output = func(self, *args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/stable-venv/lib/python3.12/site-packages/transformers/models/qwen3_5/modeling_qwen3_5.py", line 1937, in forward
    outputs = self.model(
              ^^^^^^^^^^^
  File "/workspace/stable-venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1776, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/stable-venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1787, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/stable-venv/lib/python3.12/site-packages/transformers/utils/generic.py", line 876, in wrapper
    output = func(self, *args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/stable-venv/lib/python3.12/site-packages/transformers/models/qwen3_5/modeling_qwen3_5.py", line 1666, in forward
    image_mask, _ = self.get_placeholder_mask(
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/stable-venv/lib/python3.12/site-packages/transformers/models/qwen3_5/modeling_qwen3_5.py", line 1569, in get_placeholder_mask
    torch_compilable_check(
  File "/workspace/stable-venv/lib/python3.12/site-packages/transformers/utils/import_utils.py", line 1540, in torch_compilable_check
    torch._check_with(error_type, cond, msg_callable)
  File "/workspace/stable-venv/lib/python3.12/site-packages/torch/__init__.py", line 1719, in _check_with
    raise error_type(message_evaluated)
ValueError: Image features and image tokens do not match, tokens: 596, features: 298

🦥 You can also ask via our Reddit page: https://reddit.com/r/unsloth/

Metadata

Metadata

Assignees

Type

No type
No fields configured for issues without a type.

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions