[torch.compile] Undo the fast_moe_cold_start hack in torch>=2.11 (#35475)

Signed-off-by: Richard Zou <zou3519@gmail.com>
2026-03-01 16:44:22 -05:00
parent 6290470843
commit e82fbeec7b
4 changed files with 109 additions and 10 deletions
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -883,7 +883,13 @@ class VllmConfig:
                    self.compilation_config.pass_config.enable_sp = False
                    self.compilation_config.pass_config.fuse_gemm_comms = False

-        if self.compilation_config.fast_moe_cold_start is None:
+        from vllm.utils.torch_utils import HAS_OPAQUE_TYPE
+
+        if HAS_OPAQUE_TYPE:
+            # On torch >= 2.11 the hoisted OpaqueObject approach supersedes
+            # fast_moe_cold_start, so force it off.
+            self.compilation_config.fast_moe_cold_start = False
+        elif self.compilation_config.fast_moe_cold_start is None:
            # resolve default behavior: try to be as safe as possible
            # this config is unsafe if any spec decoding draft model has a MOE.
            # We'll conservatively turn it off if we see spec decoding.