[Model] Deepseek GGUF support (#13167)

2025-02-27 11:08:35 +01:00
parent edf309ebbe
commit 7f0be2aa24
8 changed files with 198 additions and 10 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -229,6 +229,7 @@ class ModelConfig:
        trust_remote_code: bool,
        dtype: Union[str, torch.dtype],
        seed: int,
+        hf_config_path: Optional[str] = None,
        allowed_local_media_path: str = "",
        revision: Optional[str] = None,
        code_revision: Optional[str] = None,
@@ -259,6 +260,7 @@ class ModelConfig:
        model_impl: Union[str, ModelImpl] = ModelImpl.AUTO,
    ) -> None:
        self.model = model
+        self.hf_config_path = hf_config_path
        self.tokenizer = tokenizer
        self.tokenizer_mode = tokenizer_mode
        self.trust_remote_code = trust_remote_code
@@ -321,8 +323,9 @@ class ModelConfig:
        if self.enable_sleep_mode and not current_platform.is_cuda():
            raise ValueError("Sleep mode is only supported on CUDA devices.")

-        hf_config = get_config(self.model, trust_remote_code, revision,
-                               code_revision, config_format)
+        hf_config = get_config(self.hf_config_path or self.model,
+                               trust_remote_code, revision, code_revision,
+                               config_format)

        if hf_overrides_kw:
            logger.info("Overriding HF config with %s", hf_overrides_kw)
@@ -947,7 +950,7 @@ class ModelConfig:
    def try_get_generation_config(self) -> Dict[str, Any]:
        if self.generation_config is None or self.generation_config == "auto":
            config = try_get_generation_config(
-                self.model,
+                self.hf_config_path or self.model,
                trust_remote_code=self.trust_remote_code,
                revision=self.revision,
            )