[Chore] Clean up pytorch helper functions in vllm.utils (#26908)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> Signed-off-by: isotr0py <2037008807@qq.com>
2025-10-19 00:48:22 +08:00
parent 5c2acb270a
commit 6ac5e06f7c
119 changed files with 772 additions and 714 deletions
--- a/tests/utils_/test_utils.py
+++ b/tests/utils_/test_utils.py
@@ -24,11 +24,8 @@ from vllm.transformers_utils.detokenizer_utils import convert_ids_list_to_tokens
 from vllm.utils import (
    FlexibleArgumentParser,
    bind_kv_cache,
-    common_broadcastable_dtype,
-    current_stream,
    get_open_port,
    get_tcp_uri,
-    is_lossless_cast,
    join_host_port,
    make_zmq_path,
    make_zmq_socket,
@@ -37,6 +34,11 @@ from vllm.utils import (
    split_zmq_path,
    unique_filepath,
 )
+from vllm.utils.torch_utils import (
+    common_broadcastable_dtype,
+    current_stream,
+    is_lossless_cast,
+)

 from vllm.utils.mem_utils import MemorySnapshot, memory_profiling
 from ..utils import create_new_process_for_each_test, flat_product
@@ -408,7 +410,7 @@ def test_bind_kv_cache_non_attention():


 def test_bind_kv_cache_pp():
-    with patch("vllm.utils.cuda_device_count_stateless", lambda: 2):
+    with patch("vllm.utils.torch_utils.cuda_device_count_stateless", lambda: 2):
        # this test runs with 1 GPU, but we simulate 2 GPUs
        cfg = VllmConfig(parallel_config=ParallelConfig(pipeline_parallel_size=2))
    with set_current_vllm_config(cfg):