[core] platform agnostic executor via collective_rpc (#11256)

Signed-off-by: youkaichao <youkaichao@gmail.com>
2025-01-15 13:45:21 +08:00
parent f218f9c24d
commit ad34c0df0f
43 changed files with 851 additions and 2641 deletions
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -2,6 +2,7 @@ import asyncio
 import os
 import socket
 from typing import AsyncIterator, Tuple
+from unittest.mock import patch

 import pytest
 import torch
@@ -390,7 +391,10 @@ def test_bind_kv_cache_encoder_decoder():


 def test_bind_kv_cache_pp():
-    cfg = VllmConfig(parallel_config=ParallelConfig(pipeline_parallel_size=2))
+    with patch("vllm.utils.cuda_device_count_stateless", lambda: 2):
+        # this test runs with 1 GPU, but we simulate 2 GPUs
+        cfg = VllmConfig(
+            parallel_config=ParallelConfig(pipeline_parallel_size=2))
    with set_current_vllm_config(cfg):
        from vllm.attention import Attention