[torch.compile] rework compile control with piecewise cudagraph (#9715)

Signed-off-by: youkaichao <youkaichao@gmail.com>
2024-10-29 23:03:49 -07:00
parent 7b0365efef
commit ff5ed6e1bc
17 changed files with 979 additions and 102 deletions
--- a/vllm/platforms/tpu.py
+++ b/vllm/platforms/tpu.py
@@ -11,7 +11,7 @@ from .interface import Platform, PlatformEnum
 if "VLLM_TORCH_COMPILE_LEVEL" not in os.environ:
    os.environ["VLLM_TORCH_COMPILE_LEVEL"] = str(CompilationLevel.DYNAMO_ONCE)

-assert envs.VLLM_TORCH_COMPILE_LEVEL < CompilationLevel.INDUCTOR,\
+assert envs.VLLM_TORCH_COMPILE_LEVEL < CompilationLevel.PIECEWISE,\
     "TPU does not support Inductor."

 set_torch_compile_backend("openxla")