[Chore] Clean up pytorch helper functions in vllm.utils (#26908)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Signed-off-by: isotr0py <2037008807@qq.com>
This commit is contained in:
Isotr0py
2025-10-19 00:48:22 +08:00
committed by GitHub
parent 5c2acb270a
commit 6ac5e06f7c
119 changed files with 772 additions and 714 deletions

View File

@@ -24,11 +24,8 @@ from vllm.transformers_utils.detokenizer_utils import convert_ids_list_to_tokens
from vllm.utils import (
FlexibleArgumentParser,
bind_kv_cache,
common_broadcastable_dtype,
current_stream,
get_open_port,
get_tcp_uri,
is_lossless_cast,
join_host_port,
make_zmq_path,
make_zmq_socket,
@@ -37,6 +34,11 @@ from vllm.utils import (
split_zmq_path,
unique_filepath,
)
from vllm.utils.torch_utils import (
common_broadcastable_dtype,
current_stream,
is_lossless_cast,
)
from vllm.utils.mem_utils import MemorySnapshot, memory_profiling
from ..utils import create_new_process_for_each_test, flat_product
@@ -408,7 +410,7 @@ def test_bind_kv_cache_non_attention():
def test_bind_kv_cache_pp():
with patch("vllm.utils.cuda_device_count_stateless", lambda: 2):
with patch("vllm.utils.torch_utils.cuda_device_count_stateless", lambda: 2):
# this test runs with 1 GPU, but we simulate 2 GPUs
cfg = VllmConfig(parallel_config=ParallelConfig(pipeline_parallel_size=2))
with set_current_vllm_config(cfg):