Deepseek v3 (#11502)
Some checks failed
Create Release / Create Release (push) Has been cancelled
Some checks failed
Create Release / Create Release (push) Has been cancelled
Signed-off-by: mgoin <michael@neuralmagic.com> Co-authored-by: mgoin <michael@neuralmagic.com> Co-authored-by: robertgshaw2-neuralmagic <rshaw@neuralmagic.com>
This commit is contained in:
@@ -596,6 +596,12 @@ class ModelConfig:
|
||||
self.max_seq_len_to_capture = min(self.max_seq_len_to_capture,
|
||||
self.max_model_len)
|
||||
|
||||
if (self.hf_config.model_type == 'deepseek_v3'
|
||||
and not self.enforce_eager):
|
||||
logger.warning("CUDA graph is not supported for Deepseek V3 yet, "
|
||||
"fallback to the eager mode.")
|
||||
self.enforce_eager = True
|
||||
|
||||
def _verify_bnb_config(self) -> None:
|
||||
"""
|
||||
The current version of bitsandbytes (0.44.0) with 8-bit models does not
|
||||
@@ -712,8 +718,9 @@ class ModelConfig:
|
||||
|
||||
def get_head_size(self) -> int:
|
||||
# TODO remove hard code
|
||||
if hasattr(self.hf_text_config, "model_type"
|
||||
) and self.hf_text_config.model_type == 'deepseek_v2':
|
||||
if hasattr(self.hf_text_config,
|
||||
"model_type") and (self.hf_text_config.model_type
|
||||
in ('deepseek_v2', 'deepseek_v3')):
|
||||
# FlashAttention supports only head_size 32, 64, 128, 256,
|
||||
# we need to pad head_size 192 to 256
|
||||
return 256
|
||||
|
||||
Reference in New Issue
Block a user