vllm/platforms/tpu.py

import os

import torch

import vllm.envs as envs
from vllm.compilation.levels import CompilationLevel
from vllm.plugins import set_torch_compile_backend

from .interface import Platform, PlatformEnum

if "VLLM_TORCH_COMPILE_LEVEL" not in os.environ:
    os.environ["VLLM_TORCH_COMPILE_LEVEL"] = str(CompilationLevel.DYNAMO_ONCE)

assert envs.VLLM_TORCH_COMPILE_LEVEL < CompilationLevel.PIECEWISE,\
     "TPU does not support Inductor."

set_torch_compile_backend("openxla")


class TpuPlatform(Platform):
    _enum = PlatformEnum.TPU

    @classmethod
    def get_device_name(cls, device_id: int = 0) -> str:
        raise NotImplementedError

    @classmethod
    def get_device_total_memory(cls, device_id: int = 0) -> int:
        raise NotImplementedError

    @classmethod
    def inference_mode(cls):
        return torch.no_grad()
[torch.compile] integration with compilation control (#9058) 2024-10-10 12:39:36 -07:00			`import os`

[Misc] Add a wrapper for torch.inference_mode (#6618) 2024-07-21 18:43:11 -07:00			`import torch`

[torch.compile] integration with compilation control (#9058) 2024-10-10 12:39:36 -07:00			`import vllm.envs as envs`
			`from vllm.compilation.levels import CompilationLevel`
			`from vllm.plugins import set_torch_compile_backend`

[Misc] Add a wrapper for torch.inference_mode (#6618) 2024-07-21 18:43:11 -07:00			`from .interface import Platform, PlatformEnum`

[torch.compile] integration with compilation control (#9058) 2024-10-10 12:39:36 -07:00			`if "VLLM_TORCH_COMPILE_LEVEL" not in os.environ:`
			`os.environ["VLLM_TORCH_COMPILE_LEVEL"] = str(CompilationLevel.DYNAMO_ONCE)`

[torch.compile] rework compile control with piecewise cudagraph (#9715) Signed-off-by: youkaichao <youkaichao@gmail.com> 2024-10-29 23:03:49 -07:00			`assert envs.VLLM_TORCH_COMPILE_LEVEL < CompilationLevel.PIECEWISE,\`
[torch.compile] integration with compilation control (#9058) 2024-10-10 12:39:36 -07:00			`"TPU does not support Inductor."`

			`set_torch_compile_backend("openxla")`

[Misc] Add a wrapper for torch.inference_mode (#6618) 2024-07-21 18:43:11 -07:00
			`class TpuPlatform(Platform):`
			`_enum = PlatformEnum.TPU`

[CI/Build] Avoid CUDA initialization (#8534) 2024-09-18 18:38:11 +08:00			`@classmethod`
			`def get_device_name(cls, device_id: int = 0) -> str:`
			`raise NotImplementedError`

[CI/Build] Add test decorator for minimum GPU memory (#8925) 2024-09-29 10:50:51 +08:00			`@classmethod`
			`def get_device_total_memory(cls, device_id: int = 0) -> int:`
			`raise NotImplementedError`

[CI/Build] Avoid CUDA initialization (#8534) 2024-09-18 18:38:11 +08:00			`@classmethod`
			`def inference_mode(cls):`
[Misc] Add a wrapper for torch.inference_mode (#6618) 2024-07-21 18:43:11 -07:00			`return torch.no_grad()`