[XPU] Disable xpu graph by default (#38193)
Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
@@ -247,6 +247,7 @@ if TYPE_CHECKING:
|
||||
VLLM_ELASTIC_EP_DRAIN_REQUESTS: bool = False
|
||||
VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS: bool = False
|
||||
VLLM_NIXL_EP_MAX_NUM_RANKS: int = 32
|
||||
VLLM_XPU_ENABLE_XPU_GRAPH: bool = False
|
||||
|
||||
|
||||
def get_default_cache_root():
|
||||
@@ -1648,6 +1649,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
||||
"VLLM_NIXL_EP_MAX_NUM_RANKS": lambda: int(
|
||||
os.getenv("VLLM_NIXL_EP_MAX_NUM_RANKS", "32")
|
||||
),
|
||||
# Whether enable XPU graph on Intel GPU
|
||||
"VLLM_XPU_ENABLE_XPU_GRAPH": lambda: bool(
|
||||
int(os.getenv("VLLM_XPU_ENABLE_XPU_GRAPH", "0"))
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ import vllm_xpu_kernels._C # noqa
|
||||
import vllm_xpu_kernels._moe_C # noqa
|
||||
import vllm_xpu_kernels._xpu_C # noqa
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.logger import init_logger
|
||||
from vllm.utils.torch_utils import supports_xpu_graph
|
||||
from vllm.v1.attention.backends.registry import AttentionBackendEnum
|
||||
@@ -181,6 +182,12 @@ class XPUPlatform(Platform):
|
||||
"XPU Graph is not supported in the current PyTorch version, "
|
||||
"disabling cudagraph_mode."
|
||||
)
|
||||
elif not envs.VLLM_XPU_ENABLE_XPU_GRAPH:
|
||||
compilation_config.cudagraph_mode = CUDAGraphMode.NONE
|
||||
logger.warning(
|
||||
"XPU Graph is disabled by environment variable, "
|
||||
"please set VLLM_XPU_ENABLE_XPU_GRAPH=1 to enable it."
|
||||
)
|
||||
elif parallel_config.world_size_across_dp > 1:
|
||||
compilation_config.cudagraph_mode = CUDAGraphMode.NONE
|
||||
logger.warning(
|
||||
|
||||
Reference in New Issue
Block a user