[torch.compile] Fine-grained CustomOp enabling mechanism (#9300)
This commit is contained in:
13
vllm/envs.py
13
vllm/envs.py
@@ -65,6 +65,7 @@ if TYPE_CHECKING:
|
||||
VLLM_ALLOW_RUNTIME_LORA_UPDATING: bool = False
|
||||
VLLM_SKIP_P2P_CHECK: bool = False
|
||||
VLLM_TORCH_COMPILE_LEVEL: int = 0
|
||||
VLLM_CUSTOM_OPS: List[str] = []
|
||||
VLLM_DISABLED_KERNELS: List[str] = []
|
||||
|
||||
|
||||
@@ -205,7 +206,17 @@ environment_variables: Dict[str, Callable[[], Any]] = {
|
||||
os.environ.get("VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") != "0"),
|
||||
"VLLM_TORCH_COMPILE_LEVEL":
|
||||
lambda: int(os.environ.get("VLLM_TORCH_COMPILE_LEVEL", "0")),
|
||||
|
||||
# Fine-grained control over which custom ops to enable/disable.
|
||||
# Use 'all' to enable all, 'none' to disable all.
|
||||
# Also specify a list of custom op names to enable (prefixed with a '+'),
|
||||
# or disable (prefixed with a '-').
|
||||
# Examples:
|
||||
# - 'all,-op1' to enable all except op1
|
||||
# - 'none,+op1,+op2' to enable only op1 and op2
|
||||
# By default, all custom ops are enabled when running without Inductor
|
||||
# and disabled when running with Inductor (compile_level >= Inductor).
|
||||
"VLLM_CUSTOM_OPS":
|
||||
lambda: os.environ.get("VLLM_CUSTOM_OPS", "").replace(" ", "").split(","),
|
||||
# local rank of the process in the distributed setting, used to determine
|
||||
# the GPU device id
|
||||
"LOCAL_RANK":
|
||||
|
||||
Reference in New Issue
Block a user