convert : add support for Nemotron Nano 3 Omni#22481
Conversation
This commit adds support for NVIDIA Nemotron Nano 3 Omni model enabling this model to be converted to GGUF.
| def dequant_model(self): | ||
| if self._is_nvfp4: | ||
| # Skip nvfp4 quantization for vision/audio model. | ||
| return | ||
| super().dequant_model() |
There was a problem hiding this comment.
This was to enable the mmproj model conversion for the NVFP4 model. It was a very late change as I did not get access to the NVFP4 model until yesterday, so there may be better ways to do this. Below is the commit in isolation, and also the error if we just remove/comment out the above dequant_model function in the NemotronNanoV2VLModel class.
nvfp4 commit
commit 11404c21dc0b5409e85686c426c9ae7c20944147
Author: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Tue Apr 28 08:53:45 2026 +0200
convert : avoid nvfp4 processing for mmproj model
This commit enables avoiding nvfp4 processing for mmproj models as the
test language model does not need to be processed for these models and
they also don't contain the mapping of the text model tensors which will
cause errors during conversion.
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index f5796cb5d..03aa957f0 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -728,6 +728,9 @@ class ModelBase:
del experts, merged
+ def _needs_nvfp4_processing(self) -> bool:
+ return True
+
def prepare_tensors(self):
# detect NVFP4 quantization (ModelOpt format)
quant_algo = (self.hparams.get("quantization_config") or {}).get("quant_algo")
@@ -758,7 +761,7 @@ class ModelBase:
# NVFP4 weights are repacked and written directly to gguf_writer.
# This must run before dequant_model so NVFP4 tensors are removed
# from model_tensors, leaving only non-NVFP4 (e.g. FP8) for dequant.
- if self._is_nvfp4:
+ if self._is_nvfp4 and self._needs_nvfp4_processing():
self._generate_nvfp4_tensors()
self.dequant_model()
@@ -2190,6 +2193,10 @@ class MmprojModel(ModelBase):
# merge configs
self.preprocessor_config = {**self.preprocessor_config, **cfg}
+ def _needs_nvfp4_processing(self) -> bool:
+ # nvfp4 quantization applies to the text model only.
+ return False
+
def get_vision_config(self) -> dict[str, Any] | None:
config_name = "vision_config" if not self.is_mistral_format else "vision_encoder"
return self.global_config.get(config_name)
@@ -4450,6 +4457,12 @@ class NemotronNanoV2VLModel(MmprojModel):
}
return vision_config
+ def dequant_model(self):
+ if self._is_nvfp4:
+ # Skip nvfp4 quantization for vision/audio model.
+ return
+ super().dequant_model()
+
def set_gguf_parameters(self):
if "image_mean" not in self.preprocessor_config:
self.preprocessor_config["image_mean"] = [0.485, 0.456, 0.406]error
INFO:hf-to-gguf:Exporting model...
Traceback (most recent call last):
File "/home/danbev/work/llama.cpp/examples/model-conversion/../../convert_hf_to_gguf.py", line 13586, in <module>
main()
File "/home/danbev/work/llama.cpp/examples/model-conversion/../../convert_hf_to_gguf.py", line 13580, in main
model_instance.write()
File "/home/danbev/work/llama.cpp/examples/model-conversion/../../convert_hf_to_gguf.py", line 933, in write
self.prepare_tensors()
File "/home/danbev/work/llama.cpp/examples/model-conversion/../../convert_hf_to_gguf.py", line 775, in prepare_tensors
for name, data_torch in chain(self.generate_extra_tensors(), self.get_tensors()):
File "/home/danbev/work/llama.cpp/examples/model-conversion/../../convert_hf_to_gguf.py", line 527, in get_tensors
yield name, gen()
^^^^^
File "/home/danbev/work/llama.cpp/examples/model-conversion/../../convert_hf_to_gguf.py", line 511, in <lambda>
self.model_tensors[weight_name] = lambda w=w, s=s: dequant_simple(w(), s(), None)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/danbev/work/llama.cpp/examples/model-conversion/../../convert_hf_to_gguf.py", line 328, in dequant_simple
return weight.float() * scale
~~~~~~~~~~~~~~~^~~~~~~
File "/home/danbev/work/llama.cpp/examples/model-conversion/../../gguf-py/gguf/lazy.py", line 40, in wrapped_special_op
return type(self)._wrap_fn(
^^^^^^^^^^^^^^^^^^^^
File "/home/danbev/work/llama.cpp/examples/model-conversion/../../gguf-py/gguf/lazy.py", line 126, in wrapped_fn
res = fn(*meta_args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/danbev/work/llama.cpp/venv/lib/python3.12/site-packages/torch/_prims_common/wrappers.py", line 291, in _fn
result = fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/danbev/work/llama.cpp/venv/lib/python3.12/site-packages/torch/_prims_common/wrappers.py", line 143, in _fn
result = fn(**bound.arguments)
^^^^^^^^^^^^^^^^^^^^^
File "/home/danbev/work/llama.cpp/venv/lib/python3.12/site-packages/torch/_refs/__init__.py", line 1095, in _ref
a, b = _maybe_broadcast(a, b)
^^^^^^^^^^^^^^^^^^^^^^
File "/home/danbev/work/llama.cpp/venv/lib/python3.12/site-packages/torch/_refs/__init__.py", line 437, in _maybe_broadcast
common_shape = _broadcast_shapes(
^^^^^^^^^^^^^^^^^^
File "/home/danbev/work/llama.cpp/venv/lib/python3.12/site-packages/torch/_refs/__init__.py", line 425, in _broadcast_shapes
torch._check(
File "/home/danbev/work/llama.cpp/venv/lib/python3.12/site-packages/torch/__init__.py", line 1656, in _check
_check_with(RuntimeError, cond, message)
File "/home/danbev/work/llama.cpp/venv/lib/python3.12/site-packages/torch/__init__.py", line 1638, in _check_with
raise error_type(message_evaluated)
RuntimeError: Attempting to broadcast a dimension of length 116 at -1! Mismatching argument at index 1 had torch.Size([2688, 116]); but expected shape should be broadcastable to [2688, 928]There was a problem hiding this comment.
This suggests you are now left with weight_scale tensors unaccounted for, are you sure this created a working GGUF?
Edit: Oh, wait, I get it, it's because you're skipping the whole process for mmproj, so the NVFP4 tensors are left as-is.
There was a problem hiding this comment.
There is probably a cleaner way to do this, I'll look into it.
This commit adds support for NVIDIA Nemotron Nano 3 Omni model enabling this model to be converted to GGUF.
This commit adds support for NVIDIA Nemotron Nano 3 Omni model enabling this model to be converted to GGUF.
This commit adds support for NVIDIA Nemotron Nano 3 Omni model enabling this model to be converted to GGUF.
This commit adds support for NVIDIA Nemotron Nano 3 Omni model enabling this model to be converted to GGUF.
This commit adds support for NVIDIA Nemotron Nano 3 Omni model enabling this model to be converted to GGUF.
This commit adds support for NVIDIA Nemotron Nano 3 Omni model enabling this model to be converted to GGUF.
This commit adds support for NVIDIA Nemotron Nano 3 Omni model enabling this model to be converted to GGUF. (cherry picked from commit 5d56eff)
This commit adds support for NVIDIA Nemotron Nano 3 Omni model enabling this model to be converted to GGUF.
This commit adds support for NVIDIA Nemotron Nano 3 Omni model enabling this model to be converted to GGUF.
Overview
This commit adds support for NVIDIA Nemotron Nano 3 Omni model enabling this model to be converted to GGUF.
Requirements