From f9e2a75a1ee1d339ec5d885f842b3cfc27d71e02 Mon Sep 17 00:00:00 2001 From: jiahanc <173873397+jiahanc@users.noreply.github.com> Date: Fri, 9 Jan 2026 12:03:02 -0800 Subject: [PATCH] [fix] add cutedsl to global sf (#32001) Signed-off-by: jiahanc <173873397+jiahanc@users.noreply.github.com> --- .../model_executor/layers/quantization/utils/flashinfer_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py b/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py index a9b30b780..799854479 100644 --- a/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py +++ b/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py @@ -264,6 +264,7 @@ def is_flashinfer_supporting_global_sf(backend: FlashinferMoeBackend | None) -> backends_supporting_global_sf = ( FlashinferMoeBackend.CUTLASS, FlashinferMoeBackend.TENSORRT_LLM, + FlashinferMoeBackend.CUTEDSL, ) return backend in backends_supporting_global_sf