[Bug] Fix torch Compilation Cache Hit Error (#25093)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
2025-09-18 15:38:37 -04:00
parent 75fb112d80
commit d2a30a2d93
2 changed files with 10 additions and 19 deletions
--- a/vllm/config/compilation.py
+++ b/vllm/config/compilation.py
@@ -563,18 +563,6 @@ class CompilationConfig:
                self.cudagraph_mode = CUDAGraphMode.FULL
            self.splitting_ops = []

-        if envs.VLLM_ALL2ALL_BACKEND == "deepep_high_throughput":
-            # exclude MoE dispatch/combine from capture by ensuring
-            # piecewise splitting includes them, so communication remains
-            # outside CUDA graphs while compute can still be graphed.
-            moe_ops = [
-                "vllm.moe_forward",
-                "vllm.moe_forward_shared",
-            ]
-            for op in moe_ops:
-                if op not in self.splitting_ops:
-                    self.splitting_ops.append(op)
-
    def splitting_ops_contain_attention(self) -> bool:
        return self.splitting_ops is not None and all(
            op in self.splitting_ops for op in self._attention_ops)