[Hardware][AMD][CI/Build][Doc] Upgrade to ROCm 6.1, Dockerfile improvements, test fixes (#5422)

This commit is contained in:
Matt Wong
2024-06-25 17:56:15 -05:00
committed by GitHub
parent bc34937d68
commit dd793d1de5
15 changed files with 257 additions and 120 deletions

View File

@@ -1,8 +1,8 @@
import os
import ray
from vllm.utils import cuda_device_count_stateless
import vllm.envs as envs
from vllm.utils import (cuda_device_count_stateless, is_hip,
update_environment_variables)
@ray.remote
@@ -12,16 +12,21 @@ class _CUDADeviceCountStatelessTestActor:
return cuda_device_count_stateless()
def set_cuda_visible_devices(self, cuda_visible_devices: str):
os.environ["CUDA_VISIBLE_DEVICES"] = cuda_visible_devices
update_environment_variables(
{"CUDA_VISIBLE_DEVICES": cuda_visible_devices})
def get_cuda_visible_devices(self):
return os.environ["CUDA_VISIBLE_DEVICES"]
return envs.CUDA_VISIBLE_DEVICES
def test_cuda_device_count_stateless():
"""Test that cuda_device_count_stateless changes return value if
CUDA_VISIBLE_DEVICES is changed."""
if is_hip():
# Set HIP_VISIBLE_DEVICES == CUDA_VISIBLE_DEVICES. Conversion
# is handled by `update_environment_variables`
update_environment_variables(
{"CUDA_VISIBLE_DEVICES": envs.CUDA_VISIBLE_DEVICES})
actor = _CUDADeviceCountStatelessTestActor.options( # type: ignore
num_gpus=2).remote()
assert sorted(ray.get(