[Core] Whisper Enable Encoder Batching (#29421)

Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
Nicolò Lucchesi
2025-12-11 22:06:51 +01:00
committed by GitHub
parent 90d6cf921f
commit 0efd9f867c
5 changed files with 87 additions and 25 deletions

View File

@@ -27,6 +27,7 @@ from vllm.logger import init_logger
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
from vllm.v1.core.encoder_cache_manager import (
EncoderCacheManager,
EncoderDecoderCacheManager,
compute_encoder_budget,
)
from vllm.v1.core.kv_cache_manager import KVCacheBlocks, KVCacheManager
@@ -181,7 +182,11 @@ class Scheduler(SchedulerInterface):
# NOTE: For the models without encoder (e.g., text-only models),
# the encoder cache will not be initialized because cache size is 0
# for these models.
self.encoder_cache_manager = EncoderCacheManager(cache_size=encoder_cache_size)
self.encoder_cache_manager = (
EncoderDecoderCacheManager(cache_size=encoder_cache_size)
if self.is_encoder_decoder
else EncoderCacheManager(cache_size=encoder_cache_size)
)
speculative_config = vllm_config.speculative_config
self.use_eagle = False