[torch.compile] rework compile control with piecewise cudagraph (#9715)

Signed-off-by: youkaichao <youkaichao@gmail.com>
2024-10-29 23:03:49 -07:00
parent 7b0365efef
commit ff5ed6e1bc
17 changed files with 979 additions and 102 deletions
--- a/vllm/model_executor/custom_op.py
+++ b/vllm/model_executor/custom_op.py
@@ -100,7 +100,7 @@ class CustomOp(nn.Module):

        return (CustomOp.default_on() or enabled) and not disabled

-    # On by default if VLLM_TORCH_COMPILE_LEVEL < CompilationLevel.INDUCTOR
+    # On by default if VLLM_TORCH_COMPILE_LEVEL < CompilationLevel.PIECEWISE
    # Specifying 'all' or 'none' in VLLM_CUSTOM_OPS takes precedence.
    @staticmethod
    @lru_cache()
@@ -108,7 +108,7 @@ class CustomOp(nn.Module):
        count_none = envs.VLLM_CUSTOM_OPS.count("none")
        count_all = envs.VLLM_CUSTOM_OPS.count("all")
        assert count_none + count_all <= 1, "Can only specify 'none' or 'all'"
-        return envs.VLLM_TORCH_COMPILE_LEVEL < CompilationLevel.INDUCTOR and \
+        return envs.VLLM_TORCH_COMPILE_LEVEL < CompilationLevel.PIECEWISE and \
            not count_none > 0 or count_all > 0

    # Dictionary of all custom ops (classes, indexed by registered name).