[feat] Enable mm caching for transformers backend (#21358)

Signed-off-by: raushan <raushan@huggingface.co>
2025-07-22 17:18:46 +02:00
parent b194557a6c
commit f38ee34a0a
4 changed files with 7 additions and 18 deletions
--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@@ -406,9 +406,9 @@ def need_extra_keys(request: Request) -> bool:
    # Multimodal requests need to include the MM hash.
    # LoRA requests need to include the LoRA ID.
    # Request with provided cache salt need to include the salt.
-    return bool(request.mm_positions) or (request.lora_request
-                                          is not None) or (request.cache_salt
-                                                           is not None)
+    return bool(request.mm_hashes) or (request.lora_request
+                                       is not None) or (request.cache_salt
+                                                        is not None)


 def _gen_mm_extra_hash_keys(request: Request, start_token_idx: int,