Support Deepseek-V2 (#4650)

Co-authored-by: Philipp Moritz <pcmoritz@gmail.com>
2024-06-29 04:24:57 +08:00
parent 2cd402e169
commit be0b3af9e0
6 changed files with 700 additions and 1 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -297,6 +297,12 @@ class ModelConfig:
        return self.hf_text_config.hidden_size

    def get_head_size(self) -> int:
+        # TODO remove hard code
+        if hasattr(self.hf_text_config, "model_type"
+                   ) and self.hf_text_config.model_type == 'deepseek_v2':
+            # FlashAttention supports only head_size 32, 64, 128, 256,
+            # we need to pad head_size 192 to 256
+            return 256
        if hasattr(self.hf_text_config, "head_dim"):
            return self.hf_text_config.head_dim
        # FIXME(woosuk): This may not be true for all models.