[Bugfix][ROCm][MoE] Fix mxfp4 oracle regressions from #37128 (#37787)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
Andreas Karatzas
2026-03-24 19:17:33 -05:00
committed by GitHub
parent 8bbb7c7f20
commit 679c6a3ecc
11 changed files with 69 additions and 15 deletions

View File

@@ -5,6 +5,7 @@ import pytest
import vllm
from vllm.lora.request import LoRARequest
from vllm.platforms import current_platform
from ..utils import multi_gpu_test
@@ -69,6 +70,16 @@ def generate_and_test(llm: vllm.LLM, lora_path: str, lora_id: int) -> None:
assert generated_texts[i].startswith(EXPECTED_LORA_OUTPUT[i])
@pytest.mark.skipif(
not current_platform.is_cuda(),
reason=(
"Mxfp4 LoRA on ROCm is blocked by a spawn compatibility issue. "
"The fused_moe_lora Triton kernel crashes in spawned subprocesses, "
"and vLLM forces spawn mode when HIP is initialized before "
"multiprocessing. Fixing this requires either making the LoRA "
"Triton kernel spawn-safe or pre-warming the kernel cache."
),
)
@pytest.mark.parametrize("mxfp4_use_marlin", [True, False])
@pytest.mark.parametrize("specialize_active_lora", [True, False])
def test_gpt_oss_lora(