[TPU][V1] Fix exponential padding when max-num-batched-tokens is not a power of 2 (#16596)

Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
Nicolò Lucchesi
2025-04-14 19:01:05 +02:00
committed by GitHub
parent aa29841ede
commit b3f2fddd17
2 changed files with 15 additions and 1 deletions

View File

@@ -1040,9 +1040,11 @@ def _get_token_paddings(min_token_size: int, max_token_size: int,
if padding_gap == 0:
logger.info("Using exponential token paddings:")
while num <= max_token_size:
while True:
logger.info(" %d", num)
paddings.append(num)
if num >= max_token_size:
break
num *= 2
else: