diff --git a/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py b/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py index a9b30b780..799854479 100644 --- a/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py +++ b/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py @@ -264,6 +264,7 @@ def is_flashinfer_supporting_global_sf(backend: FlashinferMoeBackend | None) -> backends_supporting_global_sf = ( FlashinferMoeBackend.CUTLASS, FlashinferMoeBackend.TENSORRT_LLM, + FlashinferMoeBackend.CUTEDSL, ) return backend in backends_supporting_global_sf