[Bug] Fix vLLM config is not set error (#29999)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
Wentao Ye
2025-12-05 16:42:12 -05:00
committed by GitHub
parent 77e4472809
commit 7b5575fa7d
5 changed files with 47 additions and 27 deletions

View File

@@ -247,6 +247,11 @@ def flashinfer_cutlass_moe_fp8(
assert quant_config is not None
# Construct modular kernel with block-scale support when requested.
parallel_config = getattr(
getattr(layer, "vllm_config", None),
"parallel_config",
None,
)
fused_experts = mk.FusedMoEModularKernel(
build_flashinfer_fp8_cutlass_moe_prepare_finalize(
moe=moe, use_deepseek_fp8_block_scale=use_deepseek_fp8_block_scale
@@ -257,6 +262,7 @@ def flashinfer_cutlass_moe_fp8(
out_dtype=hidden_states.dtype,
use_deepseek_fp8_block_scale=use_deepseek_fp8_block_scale,
),
parallel_config=parallel_config,
)
return fused_experts(