[Core] Whisper support torch.compile (#30385)

Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
Nicolò Lucchesi
2026-01-19 11:02:31 +01:00
committed by GitHub
parent c0a350ca73
commit 74c583bc50
5 changed files with 27 additions and 1 deletions

View File

@@ -25,6 +25,7 @@ from vllm.config import (
set_current_vllm_config,
)
from vllm.config.compilation import DynamicShapesType
from vllm.forward_context import get_forward_context, is_forward_context_available
from vllm.logger import init_logger
from vllm.sequence import IntermediateTensors
from vllm.utils.import_utils import resolve_obj_by_qualname
@@ -388,6 +389,12 @@ def _support_torch_compile(
if self.do_not_compile or torch.compiler.is_compiling():
return self.forward(*args, **kwargs)
# If skip_compiled is set, bypass compiled model call. This is used e.g. for
# enc-dec models where tensor shapes/types vary across invocations, preventing
# the capture of a single computational graph.
if is_forward_context_available() and get_forward_context().skip_compiled:
return self.forward(*args, **kwargs)
# if aot_compiled_fn is set, call it with partition wrapper context.
# The partition wrapper must be active at runtime for CUDA graph
# capture to work correctly with inductor graph partitioning.