[Attention] Get rid of mla cache alignment (#14842)

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
2025-03-15 01:08:25 -04:00
parent a2ae496589
commit 5952d8ab61
4 changed files with 14 additions and 83 deletions
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -827,12 +827,6 @@ def get_dtype_size(dtype: torch.dtype) -> int:
    return torch.tensor([], dtype=dtype).element_size()


-def align_to_256bytes(extent: int, dtype: torch.dtype) -> int:
-    dtype_size = get_dtype_size(dtype)
-    eles_per_256bytes = 256 // dtype_size
-    return round_up(extent, eles_per_256bytes)
-
-
 # `collections` helpers
 def is_list_of(
    value: object,