[V1] Remove input cache client (#14864)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
This commit is contained in:
Cyrus Leung
2025-03-17 14:42:06 +08:00
committed by GitHub
parent 8d6cf89526
commit b539222d4e
5 changed files with 48 additions and 201 deletions

View File

@@ -379,6 +379,7 @@ class InputPreprocessor:
multi_modal_data,
mm_processor_kwargs,
lora_request=lora_request,
return_mm_hashes=return_mm_hashes,
)
prompt_token_ids = self._tokenize_prompt(
@@ -401,6 +402,7 @@ class InputPreprocessor:
prompt: SingletonPrompt,
request_id: str,
lora_request: Optional[LoRARequest] = None,
return_mm_hashes: bool = False,
) -> SingletonInputs:
"""Async version of :meth:`_extract_prompt_components`."""
parsed = parse_singleton_prompt(prompt)
@@ -431,6 +433,7 @@ class InputPreprocessor:
multi_modal_data,
mm_processor_kwargs,
lora_request=lora_request,
return_mm_hashes=return_mm_hashes,
)
return token_inputs(
@@ -452,6 +455,7 @@ class InputPreprocessor:
multi_modal_data,
mm_processor_kwargs,
lora_request=lora_request,
return_mm_hashes=return_mm_hashes,
)
prompt_token_ids = await self._tokenize_prompt_async(
@@ -726,6 +730,7 @@ class InputPreprocessor:
prompt,
request_id=request_id,
lora_request=lora_request,
return_mm_hashes=return_mm_hashes,
)
return self._build_decoder_only_llm_inputs(
@@ -746,6 +751,7 @@ class InputPreprocessor:
prompt,
request_id=request_id,
lora_request=lora_request,
return_mm_hashes=return_mm_hashes,
)
return self._build_decoder_only_llm_inputs(