[Bugfix] Set enforce_eager automatically for mllama (#12127)
Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
@@ -325,7 +325,6 @@ def run_mllama(question: str, modality: str):
|
|||||||
model=model_name,
|
model=model_name,
|
||||||
max_model_len=4096,
|
max_model_len=4096,
|
||||||
max_num_seqs=16,
|
max_num_seqs=16,
|
||||||
enforce_eager=True,
|
|
||||||
disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
|
disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -186,7 +186,6 @@ def load_mllama(question, image_urls: List[str]) -> ModelRequestData:
|
|||||||
model=model_name,
|
model=model_name,
|
||||||
max_model_len=4096,
|
max_model_len=4096,
|
||||||
max_num_seqs=16,
|
max_num_seqs=16,
|
||||||
enforce_eager=True,
|
|
||||||
limit_mm_per_prompt={"image": len(image_urls)},
|
limit_mm_per_prompt={"image": len(image_urls)},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -607,10 +607,12 @@ class ModelConfig:
|
|||||||
self.max_seq_len_to_capture = min(self.max_seq_len_to_capture,
|
self.max_seq_len_to_capture = min(self.max_seq_len_to_capture,
|
||||||
self.max_model_len)
|
self.max_model_len)
|
||||||
|
|
||||||
if (self.hf_config.model_type == 'deepseek_v3'
|
MODEL_NOT_SUPPORT_CUDA_GRAPH = ['deepseek_v3', 'mllama']
|
||||||
|
if (self.hf_config.model_type in MODEL_NOT_SUPPORT_CUDA_GRAPH
|
||||||
and not self.enforce_eager):
|
and not self.enforce_eager):
|
||||||
logger.warning("CUDA graph is not supported for Deepseek V3 yet, "
|
logger.warning(
|
||||||
"fallback to the eager mode.")
|
"CUDA graph is not supported for %s yet, fallback to the eager "
|
||||||
|
"mode.", self.hf_config.model_type)
|
||||||
self.enforce_eager = True
|
self.enforce_eager = True
|
||||||
|
|
||||||
def _verify_bnb_config(self) -> None:
|
def _verify_bnb_config(self) -> None:
|
||||||
|
|||||||
Reference in New Issue
Block a user