[Chore] Separate out optional dependency checks from vllm.utils (#27207)

Signed-off-by: dongbo910220 <1275604947@qq.com>
Signed-off-by: dongbo910220 <32610838+dongbo910220@users.noreply.github.com>
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
dongbo910220
2025-10-22 22:44:21 +08:00
committed by GitHub
parent 49c00fe304
commit 3ae082c373
22 changed files with 60 additions and 64 deletions

View File

@@ -18,12 +18,12 @@ from vllm.model_executor.layers.quantization.utils.fp8_utils import (
w8a8_triton_block_scaled_mm,
)
from vllm.platforms import current_platform
from vllm.utils import has_deep_gemm
from vllm.utils.deep_gemm import (
fp8_gemm_nt,
get_col_major_tma_aligned_tensor,
per_block_cast_to_fp8,
)
from vllm.utils.import_utils import has_deep_gemm
if current_platform.get_device_capability() < (9, 0):
pytest.skip("FP8 Triton requires CUDA 9.0 or higher", allow_module_level=True)