[V1][TPU] Remove unnecessary padding for running on TPU. (#14467)
This commit is contained in:
@@ -12,8 +12,8 @@ from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
|
||||
from vllm.attention.backends.utils import CommonAttentionState
|
||||
|
||||
# These are the 2 tunable parameters of the paged attention Pallas kernel.
|
||||
NUM_QUERIES_PER_BLOCK = 16
|
||||
NUM_KV_PAGES_PER_BLOCK = 256
|
||||
NUM_QUERIES_PER_BLOCK = 32
|
||||
NUM_KV_PAGES_PER_BLOCK = 128
|
||||
|
||||
|
||||
class PallasAttentionBackend(AttentionBackend):
|
||||
|
||||
Reference in New Issue
Block a user