[Fix Bug]num_active_loras always equals to zero (#34119)

Signed-off-by: Runkai Tao <rt572@physics.rutgers.edu>
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Runkai Tao
2026-03-02 10:17:46 -05:00
committed by GitHub
parent 7e9149d9a9
commit ada4f4fadd
7 changed files with 58 additions and 29 deletions

View File

@@ -70,8 +70,12 @@ def generate_and_test(llm: vllm.LLM, lora_path: str, lora_id: int) -> None:
@pytest.mark.parametrize("mxfp4_use_marlin", [True, False])
@pytest.mark.parametrize("specialize_active_lora", [True, False])
def test_gpt_oss_lora(
monkeypatch: pytest.MonkeyPatch, gptoss20b_lora_files, mxfp4_use_marlin
monkeypatch: pytest.MonkeyPatch,
gptoss20b_lora_files,
mxfp4_use_marlin,
specialize_active_lora,
):
with monkeypatch.context() as m:
m.setenv("VLLM_MXFP4_USE_MARLIN", "1" if mxfp4_use_marlin else "0")
@@ -83,6 +87,7 @@ def test_gpt_oss_lora(
max_lora_rank=8,
max_num_seqs=2,
max_num_batched_tokens=2048,
specialize_active_lora=specialize_active_lora,
compilation_config=vllm.config.CompilationConfig( # Avoid OOM
cudagraph_specialize_lora=False,
),