Support Deepseek-V2 (#4650)
Co-authored-by: Philipp Moritz <pcmoritz@gmail.com>
This commit is contained in:
@@ -297,6 +297,12 @@ class ModelConfig:
|
||||
return self.hf_text_config.hidden_size
|
||||
|
||||
def get_head_size(self) -> int:
|
||||
# TODO remove hard code
|
||||
if hasattr(self.hf_text_config, "model_type"
|
||||
) and self.hf_text_config.model_type == 'deepseek_v2':
|
||||
# FlashAttention supports only head_size 32, 64, 128, 256,
|
||||
# we need to pad head_size 192 to 256
|
||||
return 256
|
||||
if hasattr(self.hf_text_config, "head_dim"):
|
||||
return self.hf_text_config.head_dim
|
||||
# FIXME(woosuk): This may not be true for all models.
|
||||
|
||||
Reference in New Issue
Block a user