diff --git a/vllm/model_executor/layers/attention/mla_attention.py b/vllm/model_executor/layers/attention/mla_attention.py index 926e8892e..b0e16fa52 100644 --- a/vllm/model_executor/layers/attention/mla_attention.py +++ b/vllm/model_executor/layers/attention/mla_attention.py @@ -905,6 +905,10 @@ def unified_mla_kv_cache_update( the data dependency between them to ensure torch.compile preserves ordering. """ forward_context = get_forward_context() + if forward_context.attn_metadata is None: + # Dummy/profile forwards should not update live KV cache pages. + return torch.empty(0, device=kv_c_normed.device, dtype=kv_c_normed.dtype) + attn_layer = forward_context.no_compile_layers[layer_name] kv_cache = attn_layer.kv_cache[forward_context.virtual_engine]