[Model] Deepseek GGUF support (#13167)

This commit is contained in:
Szymon Ożóg
2025-02-27 11:08:35 +01:00
committed by GitHub
parent edf309ebbe
commit 7f0be2aa24
8 changed files with 198 additions and 10 deletions

View File

@@ -229,6 +229,7 @@ class ModelConfig:
trust_remote_code: bool,
dtype: Union[str, torch.dtype],
seed: int,
hf_config_path: Optional[str] = None,
allowed_local_media_path: str = "",
revision: Optional[str] = None,
code_revision: Optional[str] = None,
@@ -259,6 +260,7 @@ class ModelConfig:
model_impl: Union[str, ModelImpl] = ModelImpl.AUTO,
) -> None:
self.model = model
self.hf_config_path = hf_config_path
self.tokenizer = tokenizer
self.tokenizer_mode = tokenizer_mode
self.trust_remote_code = trust_remote_code
@@ -321,8 +323,9 @@ class ModelConfig:
if self.enable_sleep_mode and not current_platform.is_cuda():
raise ValueError("Sleep mode is only supported on CUDA devices.")
hf_config = get_config(self.model, trust_remote_code, revision,
code_revision, config_format)
hf_config = get_config(self.hf_config_path or self.model,
trust_remote_code, revision, code_revision,
config_format)
if hf_overrides_kw:
logger.info("Overriding HF config with %s", hf_overrides_kw)
@@ -947,7 +950,7 @@ class ModelConfig:
def try_get_generation_config(self) -> Dict[str, Any]:
if self.generation_config is None or self.generation_config == "auto":
config = try_get_generation_config(
self.model,
self.hf_config_path or self.model,
trust_remote_code=self.trust_remote_code,
revision=self.revision,
)