Decouple page_size_bytes calculation in AttentionSpec for TPU/RPA Compatibility. (#31635)

Signed-off-by: Lihao Ran <imlihao.ran@gmail.com> Signed-off-by: Lumosis <30372757+Lumosis@users.noreply.github.com>
2026-01-08 01:00:07 -08:00
parent eac3b96ec0
commit b634e619bb
6 changed files with 75 additions and 20 deletions
--- a/tests/v1/core/test_kv_sharing.py
+++ b/tests/v1/core/test_kv_sharing.py
@@ -11,7 +11,9 @@ pytestmark = pytest.mark.cpu_test


 def new_kv_cache_spec():
-    return FullAttentionSpec(16, 1, 1, torch.float32, False)
+    return FullAttentionSpec(
+        block_size=16, num_kv_heads=1, head_size=1, dtype=torch.float32
+    )


 def test_initialize_kv_cache_for_kv_sharing_different_attn_groups():