[LoRA] Optimize 3D MoE logic (#29222)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Jee Jee Li
2025-11-24 10:27:23 +08:00
committed by GitHub
parent c309bb5245
commit 1073ba68b0
11 changed files with 397 additions and 105 deletions

View File

@@ -1,6 +1,8 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
import vllm
from vllm.lora.request import LoRARequest
@@ -84,14 +86,17 @@ def test_gpt_oss_lora(gptoss20b_lora_files):
@multi_gpu_test(num_gpus=2)
def test_gpt_oss_lora_tp2(gptoss20b_lora_files):
@pytest.mark.parametrize("fully_sharded_loras", [False, True])
def test_gpt_oss_lora_tp2(gptoss20b_lora_files, fully_sharded_loras):
llm = vllm.LLM(
MODEL_PATH,
max_model_len=1024,
enable_lora=True,
max_loras=2,
max_lora_rank=8,
max_num_seqs=16,
tensor_parallel_size=2,
fully_sharded_loras=fully_sharded_loras,
compilation_config=vllm.config.CompilationConfig( # Avoid OOM
cudagraph_specialize_lora=False,
),