Enable modelopt gemma3 nvfp4/fp8, make workflow more robust (#22771)

Signed-off-by: Zhiyu Cheng <zhiyuc@nvidia.com>
Signed-off-by: Michael Goin <mgoin64@gmail.com>
Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
Zhiyu
2025-09-19 15:40:33 -07:00
committed by GitHub
parent 711e912946
commit 431535b522
7 changed files with 82 additions and 22 deletions

View File

@@ -11,7 +11,8 @@ import pytest
import torch
import vllm.model_executor.layers.fused_moe.modular_kernel as mk
from vllm.config import VllmConfig, current_platform, set_current_vllm_config
from vllm.config import VllmConfig, set_current_vllm_config
from vllm.platforms import current_platform
from vllm.utils import has_deep_ep, has_deep_gemm, has_pplx
from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe