Enable CUDA graph support for llama 3.2 vision (#14917)

Signed-off-by: Matt Ritter <100659061+mritterfigma@users.noreply.github.com>
2025-03-19 23:29:16 -07:00
parent 2f726b241e
commit a8652f4f0f
3 changed files with 1 additions and 13 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -670,14 +670,6 @@ class ModelConfig:
        self.max_seq_len_to_capture = min(self.max_seq_len_to_capture,
                                          self.max_model_len)

-        MODEL_NOT_SUPPORT_CUDA_GRAPH = ['mllama']
-        if (self.hf_config.model_type in MODEL_NOT_SUPPORT_CUDA_GRAPH
-                and not self.enforce_eager):
-            logger.warning(
-                "CUDA graph is not supported for %s yet, fallback to the eager "
-                "mode.", self.hf_config.model_type)
-            self.enforce_eager = True
-
    def _verify_bnb_config(self) -> None:
        """
        The current version of bitsandbytes (0.44.0) with 8-bit models does not