Decouple page_size_bytes calculation in AttentionSpec for TPU/RPA Compatibility. (#31635)
Signed-off-by: Lihao Ran <imlihao.ran@gmail.com> Signed-off-by: Lumosis <30372757+Lumosis@users.noreply.github.com>
This commit is contained in:
@@ -11,7 +11,9 @@ pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
def new_kv_cache_spec():
|
||||
return FullAttentionSpec(16, 1, 1, torch.float32, False)
|
||||
return FullAttentionSpec(
|
||||
block_size=16, num_kv_heads=1, head_size=1, dtype=torch.float32
|
||||
)
|
||||
|
||||
|
||||
def test_initialize_kv_cache_for_kv_sharing_different_attn_groups():
|
||||
|
||||
Reference in New Issue
Block a user