[Bugfix] Fix RequestOutput miss lora_request (#30636)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
2025-12-16 17:36:35 +08:00
parent 0d0c929f23
commit 0e391e7570
3 changed files with 19 additions and 7 deletions
--- a/tests/lora/test_gptoss_tp.py
+++ b/tests/lora/test_gptoss_tp.py
@@ -76,6 +76,8 @@ def test_gpt_oss_lora(gptoss20b_lora_files):
        enable_lora=True,
        max_loras=4,
        max_lora_rank=8,
+        max_num_seqs=2,
+        max_num_batched_tokens=2048,
        compilation_config=vllm.config.CompilationConfig(  # Avoid OOM
            cudagraph_specialize_lora=False,
        ),
@@ -94,8 +96,10 @@ def test_gpt_oss_lora_tp2(gptoss20b_lora_files, fully_sharded_loras):
        enable_lora=True,
        max_loras=2,
        max_lora_rank=8,
-        max_num_seqs=16,
+        max_num_seqs=2,
+        max_num_batched_tokens=2048,
        tensor_parallel_size=2,
+        gpu_memory_utilization=0.8,
        fully_sharded_loras=fully_sharded_loras,
        compilation_config=vllm.config.CompilationConfig(  # Avoid OOM
            cudagraph_specialize_lora=False,