[V1] TPU - Fix the chunked prompt bug (#15713)

Signed-off-by: Alexander Matveev <amatveev@redhat.com>
2025-03-28 16:19:04 -04:00
parent 04437e313d
commit c3f687ac22
2 changed files with 17 additions and 1 deletions
--- a/tests/v1/tpu/test_basic.py
+++ b/tests/v1/tpu/test_basic.py
@@ -48,7 +48,10 @@ def test_models(

        with vllm_runner(
                model,
-                max_model_len=8192,
+                # Note: max_num_batched_tokens == 1024 is needed here to
+                # actually test chunked prompt
+                max_num_batched_tokens=1024,
+                max_model_len=8196,
                gpu_memory_utilization=0.7,
                max_num_seqs=16,
                tensor_parallel_size=tensor_parallel_size) as vllm_model: