Support Deepseek-V2 (#4650)

Co-authored-by: Philipp Moritz <pcmoritz@gmail.com>
This commit is contained in:
wangding zeng
2024-06-29 04:24:57 +08:00
committed by GitHub
parent 2cd402e169
commit be0b3af9e0
6 changed files with 700 additions and 1 deletions

View File

@@ -297,6 +297,12 @@ class ModelConfig:
return self.hf_text_config.hidden_size
def get_head_size(self) -> int:
# TODO remove hard code
if hasattr(self.hf_text_config, "model_type"
) and self.hf_text_config.model_type == 'deepseek_v2':
# FlashAttention supports only head_size 32, 64, 128, 256,
# we need to pad head_size 192 to 256
return 256
if hasattr(self.hf_text_config, "head_dim"):
return self.hf_text_config.head_dim
# FIXME(woosuk): This may not be true for all models.