[V1] TPU - Revert to exponential padding by default (#15565)

Signed-off-by: Alexander Matveev <amatveev@redhat.com>
2025-03-26 17:35:05 -04:00
parent dd8a29da99
commit b2e85e26f4
2 changed files with 28 additions and 11 deletions
--- a/vllm/v1/worker/tpu_model_runner.py
+++ b/vllm/v1/worker/tpu_model_runner.py
@@ -944,18 +944,35 @@ def _get_paddings(min_token_size: int, max_token_size: int,
                  padding_gap: int) -> list[int]:
    """Generate a list of padding size, starting from min_token_size, 
    ending with a number that can cover max_token_size
-    first increase the size to twice, 
-    then increase the padding size by padding_gap.
+    
+    If padding_gap == 0 then:
+        increase 2X each time (exponential)
+    else:
+        first increase the size to twice, 
+        then increase the padding size by padding_gap.
    """
    paddings = []
    num = min_token_size
-    while num <= padding_gap:
-        paddings.append(num)
-        num *= 2
-    num //= 2
-    while num < max_token_size:
-        num += padding_gap
-        paddings.append(num)
+
+    if padding_gap == 0:
+        logger.info("Using exponential paddings:")
+        while num <= max_token_size:
+            logger.info("    %d", num)
+            paddings.append(num)
+            num *= 2
+
+    else:
+        logger.info("Using incremental paddings:")
+        while num <= padding_gap:
+            logger.info("    %d", num)
+            paddings.append(num)
+            num *= 2
+        num //= 2
+        while num < max_token_size:
+            num += padding_gap
+            logger.info("    %d", num)
+            paddings.append(num)
+
    return paddings