[V1][TPU] Remove unnecessary padding for running on TPU. (#14467)

This commit is contained in:
iefgnoix
2025-03-08 18:56:04 -08:00
committed by GitHub
parent b0d541947a
commit 10f7552789
2 changed files with 6 additions and 18 deletions

View File

@@ -12,8 +12,8 @@ from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
from vllm.attention.backends.utils import CommonAttentionState
# These are the 2 tunable parameters of the paged attention Pallas kernel.
NUM_QUERIES_PER_BLOCK = 16
NUM_KV_PAGES_PER_BLOCK = 256
NUM_QUERIES_PER_BLOCK = 32
NUM_KV_PAGES_PER_BLOCK = 128
class PallasAttentionBackend(AttentionBackend):