[Core] Whisper Enable Encoder Batching (#29421)
Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
@@ -27,6 +27,7 @@ from vllm.logger import init_logger
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
|
||||
from vllm.v1.core.encoder_cache_manager import (
|
||||
EncoderCacheManager,
|
||||
EncoderDecoderCacheManager,
|
||||
compute_encoder_budget,
|
||||
)
|
||||
from vllm.v1.core.kv_cache_manager import KVCacheBlocks, KVCacheManager
|
||||
@@ -181,7 +182,11 @@ class Scheduler(SchedulerInterface):
|
||||
# NOTE: For the models without encoder (e.g., text-only models),
|
||||
# the encoder cache will not be initialized because cache size is 0
|
||||
# for these models.
|
||||
self.encoder_cache_manager = EncoderCacheManager(cache_size=encoder_cache_size)
|
||||
self.encoder_cache_manager = (
|
||||
EncoderDecoderCacheManager(cache_size=encoder_cache_size)
|
||||
if self.is_encoder_decoder
|
||||
else EncoderCacheManager(cache_size=encoder_cache_size)
|
||||
)
|
||||
|
||||
speculative_config = vllm_config.speculative_config
|
||||
self.use_eagle = False
|
||||
|
||||
Reference in New Issue
Block a user