[Bugfix] Set enforce_eager automatically for mllama (#12127)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
Chen Zhang
2025-01-17 04:30:08 +08:00
committed by GitHub
parent 62b06ba23d
commit d06e824006
3 changed files with 5 additions and 5 deletions

View File

@@ -607,10 +607,12 @@ class ModelConfig:
self.max_seq_len_to_capture = min(self.max_seq_len_to_capture,
self.max_model_len)
if (self.hf_config.model_type == 'deepseek_v3'
MODEL_NOT_SUPPORT_CUDA_GRAPH = ['deepseek_v3', 'mllama']
if (self.hf_config.model_type in MODEL_NOT_SUPPORT_CUDA_GRAPH
and not self.enforce_eager):
logger.warning("CUDA graph is not supported for Deepseek V3 yet, "
"fallback to the eager mode.")
logger.warning(
"CUDA graph is not supported for %s yet, fallback to the eager "
"mode.", self.hf_config.model_type)
self.enforce_eager = True
def _verify_bnb_config(self) -> None: