[torch.compile] avoid Dynamo guard evaluation overhead (#7898)

Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
youkaichao
2024-08-28 16:10:12 -07:00
committed by GitHub
parent 3cdfe1f38b
commit ce6bf3a2cf
9 changed files with 190 additions and 11 deletions

View File

@@ -196,6 +196,10 @@ environment_variables: Dict[str, Callable[[], Any]] = {
# Internal flag to enable Dynamo graph capture
"VLLM_TEST_DYNAMO_GRAPH_CAPTURE":
lambda: int(os.environ.get("VLLM_TEST_DYNAMO_GRAPH_CAPTURE", "0")),
"VLLM_DYNAMO_USE_CUSTOM_DISPATCHER":
lambda:
(os.environ.get("VLLM_DYNAMO_USE_CUSTOM_DISPATCHER", "True").lower() in
("true", "1")),
# local rank of the process in the distributed setting, used to determine
# the GPU device id