[feat] Enable mm caching for transformers backend (#21358)

Signed-off-by: raushan <raushan@huggingface.co>
This commit is contained in:
Raushan Turganbay
2025-07-22 17:18:46 +02:00
committed by GitHub
parent b194557a6c
commit f38ee34a0a
4 changed files with 7 additions and 18 deletions

View File

@@ -406,9 +406,9 @@ def need_extra_keys(request: Request) -> bool:
# Multimodal requests need to include the MM hash.
# LoRA requests need to include the LoRA ID.
# Request with provided cache salt need to include the salt.
return bool(request.mm_positions) or (request.lora_request
is not None) or (request.cache_salt
is not None)
return bool(request.mm_hashes) or (request.lora_request
is not None) or (request.cache_salt
is not None)
def _gen_mm_extra_hash_keys(request: Request, start_token_idx: int,