vllm/v1/worker/xpu_model_runner.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from contextlib import contextmanager
from typing import TYPE_CHECKING

import torch

from vllm.config import VllmConfig
from vllm.logger import init_logger
from vllm.utils.torch_utils import supports_xpu_graph
from vllm.v1.worker.gpu.model_runner import (
    GPUModelRunner as GPUModelRunnerV2,
)
from vllm.v1.worker.gpu_model_runner import GPUModelRunner

if TYPE_CHECKING:
    pass

logger = init_logger(__name__)


class XPUModelRunner(GPUModelRunner):
    """A model runner for XPU devices."""

    def __init__(
        self,
        vllm_config: VllmConfig,
        device: torch.device,
    ):
        with _torch_cuda_wrapper():
            super().__init__(vllm_config, device)
        # FIXME: To be verified.
        self.cascade_attn_enabled = False


class XPUModelRunnerV2(GPUModelRunnerV2):
    """A model runner for XPU devices."""

    def __init__(
        self,
        vllm_config: VllmConfig,
        device: torch.device,
    ):
        with _torch_cuda_wrapper():
            super().__init__(vllm_config, device)


@contextmanager
def _torch_cuda_wrapper():
    try:
        # replace cuda APIs with xpu APIs, this should work by default
        torch.cuda.Stream = torch.xpu.Stream
        torch.cuda.default_stream = torch.xpu.current_stream
        torch.cuda.current_stream = torch.xpu.current_stream
        torch.cuda.stream = torch.xpu.stream
        torch.cuda.mem_get_info = torch.xpu.mem_get_info
        torch.cuda.Event = torch.Event
        torch.cuda.set_stream = torch.xpu.set_stream
        if supports_xpu_graph():
            torch.cuda.graph = torch.xpu.graph
            torch.cuda.CUDAGraph = torch.xpu.XPUGraph
            torch.cuda.graph_pool_handle = torch.xpu.graph_pool_handle
        yield
    finally:
        pass
[Hardware][Intel GPU] Add v1 Intel GPU support with Flash attention backend. (#19560) Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> 2025-06-27 00:27:18 +08:00			`# SPDX-License-Identifier: Apache-2.0`
[Misc] Add SPDX-FileCopyrightText (#20428) Signed-off-by: Jee Jee Li <pandaleefree@gmail.com> 2025-07-04 15:40:42 +08:00			`# SPDX-FileCopyrightText: Copyright contributors to the vLLM project`
[XPU]fix cuda event used in XPU model runner (#23708) Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> 2025-08-27 15:27:14 +08:00			`from contextlib import contextmanager`
[Hardware][Intel GPU] Add v1 Intel GPU support with Flash attention backend. (#19560) Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> 2025-06-27 00:27:18 +08:00			`from typing import TYPE_CHECKING`

			`import torch`

			`from vllm.config import VllmConfig`
			`from vllm.logger import init_logger`
[XPU]Support CUDAGraph on XPU Platform (#34482) Signed-off-by: Xinyu Chen <xinyu1.chen@intel.com> Co-authored-by: chzhang <chaojun.zhang@intel.com> Co-authored-by: zhenwei-intel <zhenwei.liu@intel.com> Co-authored-by: Kunshang Ji <kunshang.ji@intel.com> 2026-02-25 14:22:52 +08:00			`from vllm.utils.torch_utils import supports_xpu_graph`
[XPU] Enable ModelRunnerV2 on XPU (#36078) Signed-off-by: Xinyu Chen <xinyu1.chen@intel.com> 2026-03-06 01:19:18 +08:00			`from vllm.v1.worker.gpu.model_runner import (`
			`GPUModelRunner as GPUModelRunnerV2,`
			`)`
[Hardware][Intel GPU] Add v1 Intel GPU support with Flash attention backend. (#19560) Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> 2025-06-27 00:27:18 +08:00			`from vllm.v1.worker.gpu_model_runner import GPUModelRunner`

			`if TYPE_CHECKING:`
			`pass`

			`logger = init_logger(__name__)`


			`class XPUModelRunner(GPUModelRunner):`
			`"""A model runner for XPU devices."""`

			`def __init__(`
			`self,`
			`vllm_config: VllmConfig,`
			`device: torch.device,`
			`):`
[XPU]fix cuda event used in XPU model runner (#23708) Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> 2025-08-27 15:27:14 +08:00			`with _torch_cuda_wrapper():`
			`super().__init__(vllm_config, device)`
[Hardware][Intel GPU] Add v1 Intel GPU support with Flash attention backend. (#19560) Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> 2025-06-27 00:27:18 +08:00			`# FIXME: To be verified.`
			`self.cascade_attn_enabled = False`

[XPU]fix cuda event used in XPU model runner (#23708) Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> 2025-08-27 15:27:14 +08:00
[XPU] Enable ModelRunnerV2 on XPU (#36078) Signed-off-by: Xinyu Chen <xinyu1.chen@intel.com> 2026-03-06 01:19:18 +08:00			`class XPUModelRunnerV2(GPUModelRunnerV2):`
			`"""A model runner for XPU devices."""`

			`def __init__(`
			`self,`
			`vllm_config: VllmConfig,`
			`device: torch.device,`
			`):`
			`with _torch_cuda_wrapper():`
			`super().__init__(vllm_config, device)`


[XPU]fix cuda event used in XPU model runner (#23708) Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> 2025-08-27 15:27:14 +08:00			`@contextmanager`
			`def _torch_cuda_wrapper():`
			`try:`
[XPU] Fix xpu model runner call torch.cuda APIs (#25011) Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> 2025-09-17 14:45:25 +08:00			`# replace cuda APIs with xpu APIs, this should work by default`
			`torch.cuda.Stream = torch.xpu.Stream`
			`torch.cuda.default_stream = torch.xpu.current_stream`
			`torch.cuda.current_stream = torch.xpu.current_stream`
			`torch.cuda.stream = torch.xpu.stream`
[XPU]Support CUDAGraph on XPU Platform (#34482) Signed-off-by: Xinyu Chen <xinyu1.chen@intel.com> Co-authored-by: chzhang <chaojun.zhang@intel.com> Co-authored-by: zhenwei-intel <zhenwei.liu@intel.com> Co-authored-by: Kunshang Ji <kunshang.ji@intel.com> 2026-02-25 14:22:52 +08:00			`torch.cuda.mem_get_info = torch.xpu.mem_get_info`
[XPU] Enable ModelRunnerV2 on XPU (#36078) Signed-off-by: Xinyu Chen <xinyu1.chen@intel.com> 2026-03-06 01:19:18 +08:00			`torch.cuda.Event = torch.Event`
			`torch.cuda.set_stream = torch.xpu.set_stream`
[XPU]Support CUDAGraph on XPU Platform (#34482) Signed-off-by: Xinyu Chen <xinyu1.chen@intel.com> Co-authored-by: chzhang <chaojun.zhang@intel.com> Co-authored-by: zhenwei-intel <zhenwei.liu@intel.com> Co-authored-by: Kunshang Ji <kunshang.ji@intel.com> 2026-02-25 14:22:52 +08:00			`if supports_xpu_graph():`
			`torch.cuda.graph = torch.xpu.graph`
			`torch.cuda.CUDAGraph = torch.xpu.XPUGraph`
[XPU] Enable ModelRunnerV2 on XPU (#36078) Signed-off-by: Xinyu Chen <xinyu1.chen@intel.com> 2026-03-06 01:19:18 +08:00			`torch.cuda.graph_pool_handle = torch.xpu.graph_pool_handle`
[XPU]fix cuda event used in XPU model runner (#23708) Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> 2025-08-27 15:27:14 +08:00			`yield`
			`finally:`
Replace `torch.cuda.Event` with `torch.Event` for better hardware compatibility (#26985) Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> 2025-11-19 03:34:36 +08:00			`pass`