[Log] Optimize startup log (#28948)
Signed-off-by: yewentao256 <zhyanwentao@126.com> Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
@@ -162,9 +162,11 @@ def get_fp8_moe_backend(block_quant: bool) -> Fp8MoeBackend:
|
||||
# deepGEMM on supported platforms with block-quantized weights
|
||||
if envs.VLLM_USE_DEEP_GEMM and envs.VLLM_MOE_USE_DEEP_GEMM and block_quant:
|
||||
if not has_deep_gemm():
|
||||
logger.warning_once("DeepGEMM backend requested but not available.")
|
||||
logger.warning_once(
|
||||
"DeepGEMM backend requested but not available.", scope="local"
|
||||
)
|
||||
elif is_deep_gemm_supported():
|
||||
logger.info_once("Using DeepGEMM backend for FP8 MoE")
|
||||
logger.info_once("Using DeepGEMM backend for FP8 MoE", scope="local")
|
||||
return Fp8MoeBackend.DEEPGEMM
|
||||
|
||||
# CUTLASS BlockScaled GroupedGemm on SM100 with block-quantized weights
|
||||
@@ -173,7 +175,9 @@ def get_fp8_moe_backend(block_quant: bool) -> Fp8MoeBackend:
|
||||
and current_platform.is_device_capability(100)
|
||||
and block_quant
|
||||
):
|
||||
logger.info_once("Using Cutlass BlockScaled GroupedGemm backend for FP8 MoE")
|
||||
logger.info_once(
|
||||
"Using Cutlass BlockScaled GroupedGemm backend for FP8 MoE", scope="local"
|
||||
)
|
||||
return Fp8MoeBackend.CUTLASS_BLOCK_SCALED_GROUPED_GEMM
|
||||
|
||||
# default to Triton
|
||||
|
||||
Reference in New Issue
Block a user