[CI/Build] Avoid CUDA initialization (#8534)

2024-09-18 18:38:11 +08:00
parent e351572900
commit 6ffa3f314c
55 changed files with 256 additions and 256 deletions
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -5,6 +5,7 @@ import datetime
 import enum
 import gc
 import os
+import random
 import socket
 import subprocess
 import sys
@@ -32,6 +33,7 @@ from typing_extensions import ParamSpec, TypeIs, assert_never

 import vllm.envs as envs
 from vllm.logger import enable_trace_function_call, init_logger
+from vllm.platforms import current_platform

 logger = init_logger(__name__)

@@ -373,6 +375,22 @@ def get_cpu_memory() -> int:
    return psutil.virtual_memory().total


+def seed_everything(seed: int) -> None:
+    """
+    Set the seed of each random module.
+
+    Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
+    """
+    random.seed(seed)
+    np.random.seed(seed)
+
+    if current_platform.is_cuda_alike():
+        torch.cuda.manual_seed_all(seed)
+
+    if is_xpu():
+        torch.xpu.manual_seed_all(seed)
+
+
 def random_uuid() -> str:
    return str(uuid.uuid4().hex)

@@ -634,9 +652,7 @@ def create_kv_caches_with_random_flash(
    seed: int = 0,
    device: Optional[str] = "cuda",
 ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
-    torch.random.manual_seed(seed)
-    if torch.cuda.is_available():
-        torch.cuda.manual_seed(seed)
+    seed_everything(seed)

    torch_dtype = get_kv_cache_torch_dtype(cache_dtype, model_dtype)
    key_value_cache_shape = (num_blocks, 2, block_size, num_heads, head_size)
@@ -678,9 +694,7 @@ def create_kv_caches_with_random(
            f"Does not support key cache of type fp8 with head_size {head_size}"
        )

-    torch.random.manual_seed(seed)
-    if torch.cuda.is_available():
-        torch.cuda.manual_seed(seed)
+    seed_everything(seed)

    torch_dtype = get_kv_cache_torch_dtype(cache_dtype, model_dtype)

@@ -750,7 +764,7 @@ class CudaMemoryProfiler:

    def current_memory_usage(self) -> float:
        # Return the memory usage in bytes.
-        if torch.cuda.is_available():
+        if current_platform.is_cuda_alike():
            torch.cuda.reset_peak_memory_stats(self.device)
            mem = torch.cuda.max_memory_allocated(self.device)
        elif is_xpu():