[core] platform agnostic executor via collective_rpc (#11256)

Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
youkaichao
2025-01-15 13:45:21 +08:00
committed by GitHub
parent f218f9c24d
commit ad34c0df0f
43 changed files with 851 additions and 2641 deletions

View File

@@ -2,6 +2,7 @@ import asyncio
import os
import socket
from typing import AsyncIterator, Tuple
from unittest.mock import patch
import pytest
import torch
@@ -390,7 +391,10 @@ def test_bind_kv_cache_encoder_decoder():
def test_bind_kv_cache_pp():
cfg = VllmConfig(parallel_config=ParallelConfig(pipeline_parallel_size=2))
with patch("vllm.utils.cuda_device_count_stateless", lambda: 2):
# this test runs with 1 GPU, but we simulate 2 GPUs
cfg = VllmConfig(
parallel_config=ParallelConfig(pipeline_parallel_size=2))
with set_current_vllm_config(cfg):
from vllm.attention import Attention