[V1] Prefix caching for vision language models (#11187)
Signed-off-by: Cody Yu <hao.yu.cody@gmail.com>
This commit is contained in:
@@ -60,9 +60,13 @@ class AsyncLLM(EngineClient):
|
||||
self.client_aborted_requests: List[str] = []
|
||||
|
||||
# Processor (converts Inputs --> EngineCoreRequests).
|
||||
self.processor = Processor(vllm_config.model_config,
|
||||
vllm_config.lora_config, self.tokenizer,
|
||||
input_registry)
|
||||
self.processor = Processor(
|
||||
model_config=vllm_config.model_config,
|
||||
cache_config=vllm_config.cache_config,
|
||||
lora_config=vllm_config.lora_config,
|
||||
tokenizer=self.tokenizer,
|
||||
input_registry=input_registry,
|
||||
)
|
||||
|
||||
# Detokenizer (converts EngineCoreOutputs --> RequestOutput).
|
||||
self.detokenizer = Detokenizer(
|
||||
|
||||
Reference in New Issue
Block a user