[Bugfix] Lazy import gpt_oss_triton_kernels_moe for mxfp4 (#23678)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2025-08-26 21:34:57 -04:00
committed by GitHub
parent eb1995167e
commit de02b07db4

View File

@@ -10,8 +10,6 @@ from vllm.config import get_current_vllm_config
from vllm.logger import init_logger
from vllm.model_executor.layers.fused_moe import (FusedMoE, FusedMoEConfig,
FusedMoEMethodBase)
from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import (
triton_kernel_moe_forward)
from vllm.model_executor.layers.linear import (LinearBase,
UnquantizedLinearMethod)
from vllm.model_executor.layers.quantization import QuantizationMethods
@@ -557,6 +555,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
)[0]
return trtllm_gen_output
else:
from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import ( # noqa: E501
triton_kernel_moe_forward)
return triton_kernel_moe_forward(
hidden_states=x,
w1=self.w13_weight_triton_tensor,