[TPU][V1] Fix exponential padding when max-num-batched-tokens is not a power of 2 (#16596)
Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
@@ -1040,9 +1040,11 @@ def _get_token_paddings(min_token_size: int, max_token_size: int,
|
||||
|
||||
if padding_gap == 0:
|
||||
logger.info("Using exponential token paddings:")
|
||||
while num <= max_token_size:
|
||||
while True:
|
||||
logger.info(" %d", num)
|
||||
paddings.append(num)
|
||||
if num >= max_token_size:
|
||||
break
|
||||
num *= 2
|
||||
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user