[Chore] Separate out optional dependency checks from vllm.utils (#27207)

Signed-off-by: dongbo910220 <1275604947@qq.com> Signed-off-by: dongbo910220 <32610838+dongbo910220@users.noreply.github.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
2025-10-22 22:44:21 +08:00
parent 49c00fe304
commit 3ae082c373
22 changed files with 60 additions and 64 deletions
--- a/tests/kernels/quantization/test_block_fp8.py
+++ b/tests/kernels/quantization/test_block_fp8.py
@@ -18,12 +18,12 @@ from vllm.model_executor.layers.quantization.utils.fp8_utils import (
    w8a8_triton_block_scaled_mm,
 )
 from vllm.platforms import current_platform
-from vllm.utils import has_deep_gemm
 from vllm.utils.deep_gemm import (
    fp8_gemm_nt,
    get_col_major_tma_aligned_tensor,
    per_block_cast_to_fp8,
 )
+from vllm.utils.import_utils import has_deep_gemm

 if current_platform.get_device_capability() < (9, 0):
    pytest.skip("FP8 Triton requires CUDA 9.0 or higher", allow_module_level=True)