[Core] Use key-only cache for BaseMultiModalProcessor (#23018)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -11,6 +11,7 @@ from vllm.config import ModelConfig
|
||||
from vllm.logger import init_logger
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
|
||||
from vllm.multimodal.cache import BaseMultiModalProcessorCache
|
||||
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalEncDecInputs,
|
||||
MultiModalInputs)
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||
@@ -32,12 +33,14 @@ class InputPreprocessor:
|
||||
model_config: ModelConfig,
|
||||
tokenizer: Optional[TokenizerGroup],
|
||||
mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
|
||||
mm_processor_cache: Optional[BaseMultiModalProcessorCache] = None,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
|
||||
self.model_config = model_config
|
||||
self.tokenizer = tokenizer
|
||||
self.mm_registry = mm_registry
|
||||
self.mm_processor_cache = mm_processor_cache
|
||||
|
||||
def get_tokenizer_group(self) -> TokenizerGroup:
|
||||
if self.tokenizer is None:
|
||||
@@ -261,8 +264,11 @@ class InputPreprocessor:
|
||||
"""
|
||||
tokenizer = self._get_mm_tokenizer(lora_request)
|
||||
|
||||
mm_processor = self.mm_registry.create_processor(self.model_config,
|
||||
tokenizer=tokenizer)
|
||||
mm_processor = self.mm_registry.create_processor(
|
||||
self.model_config,
|
||||
tokenizer=tokenizer,
|
||||
cache=self.mm_processor_cache,
|
||||
)
|
||||
|
||||
if mm_processor_kwargs is None:
|
||||
mm_processor_kwargs = {}
|
||||
@@ -286,8 +292,12 @@ class InputPreprocessor:
|
||||
"""
|
||||
tokenizer = await self._get_mm_tokenizer_async(lora_request)
|
||||
|
||||
mm_processor = self.mm_registry.create_processor(self.model_config,
|
||||
tokenizer=tokenizer)
|
||||
mm_processor = self.mm_registry.create_processor(
|
||||
self.model_config,
|
||||
tokenizer=tokenizer,
|
||||
cache=self.mm_processor_cache,
|
||||
)
|
||||
|
||||
if mm_processor_kwargs is None:
|
||||
mm_processor_kwargs = {}
|
||||
|
||||
@@ -860,3 +870,7 @@ class InputPreprocessor:
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
lora_request=lora_request,
|
||||
)
|
||||
|
||||
def clear_cache(self) -> None:
|
||||
if self.mm_processor_cache is not None:
|
||||
self.mm_processor_cache.clear_cache()
|
||||
|
||||
Reference in New Issue
Block a user