mla: don't update kv cache on dummy forwards (#36282)

Signed-off-by: Itay Alroy <ialroy@nvidia.com>
This commit is contained in:
Itay Alroy
2026-03-07 02:36:00 +02:00
committed by GitHub
parent b5e34e1fca
commit 24a03915f5

View File

@@ -905,6 +905,10 @@ def unified_mla_kv_cache_update(
the data dependency between them to ensure torch.compile preserves ordering.
"""
forward_context = get_forward_context()
if forward_context.attn_metadata is None:
# Dummy/profile forwards should not update live KV cache pages.
return torch.empty(0, device=kv_c_normed.device, dtype=kv_c_normed.dtype)
attn_layer = forward_context.no_compile_layers[layer_name]
kv_cache = attn_layer.kv_cache[forward_context.virtual_engine]