[Bugfix][Quantization] Support BF16 tensors on GGUF (#29948)

Signed-off-by: Tsukasa OI <floss_llm@irq.a4lg.com>
2025-12-03 19:33:46 +09:00
parent cc4e296ea6
commit 42c1949643
2 changed files with 18 additions and 1 deletions
--- a/vllm/model_executor/model_loader/weight_utils.py
+++ b/vllm/model_executor/model_loader/weight_utils.py
@@ -921,7 +921,17 @@ def gguf_quant_weights_iterator(
            name = gguf_to_hf_name_map[tensor.name]
            if weight_type.name not in ("F32", "BF16", "F16"):
                name = name.replace("weight", "qweight")
-            param = torch.tensor(weight)
+            if weight_type.name == "BF16" and tensor.data.dtype == np.uint8:
+                # BF16 is currently the only "quantization" type that isn't
+                # actually quantized but is read as a raw byte tensor.
+                # Reinterpret as `torch.bfloat16` tensor.
+                weight = weight.view(np.uint16)
+                if reader.byte_order == "S":
+                    # GGUF endianness != system endianness
+                    weight = weight.byteswap()
+                param = torch.tensor(weight).view(torch.bfloat16)
+            else:
+                param = torch.tensor(weight)
            yield name, param