[Model] Deepseek GGUF support (#13167)

2025-02-27 11:08:35 +01:00
parent edf309ebbe
commit 7f0be2aa24
8 changed files with 198 additions and 10 deletions
--- a/vllm/model_executor/layers/linear.py
+++ b/vllm/model_executor/layers/linear.py
@@ -235,10 +235,23 @@ class ReplicatedLinear(LinearBase):
    def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor):
        # If the weight on disk does not have a shape, give it one
        # (such scales for AutoFp8).
+        # Special case for GGUF
+
+        is_gguf_weight = getattr(param, "is_gguf_weight", False)
+        is_gguf_weight_type = getattr(param, "is_gguf_weight_type", False)
+        if is_gguf_weight_type:
+            param.weight_type = loaded_weight.item()
+
+        # Materialize GGUF UninitializedParameter
+        if is_gguf_weight and isinstance(param, UninitializedParameter):
+            param.materialize(loaded_weight.shape, dtype=loaded_weight.dtype)
+
        if len(loaded_weight.shape) == 0:
            loaded_weight = loaded_weight.reshape(1)

-        assert param.size() == loaded_weight.size()
+        assert param.size() == loaded_weight.size(), (
+            f"Tried to load weights of size {loaded_weight.size()}"
+            f"to a parameter of size {param.size()}")
        param.data.copy_(loaded_weight)

    def forward(self,