[Feature] Add Hopper DeepGEMM E8M0 for DeepSeekV3.1 scale_fmt (#23666)
Signed-off-by: yewentao256 <zhyanwentao@126.com> Signed-off-by: youkaichao <youkaichao@gmail.com> Co-authored-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
@@ -20,7 +20,7 @@ from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.triton_utils import tl, triton
|
||||
from vllm.utils import cdiv, direct_register_custom_op
|
||||
from vllm.utils.deep_gemm import (is_blackwell_deep_gemm_e8m0_used,
|
||||
from vllm.utils.deep_gemm import (is_deep_gemm_e8m0_used,
|
||||
should_use_deepgemm_for_fp8_linear)
|
||||
|
||||
logger = init_logger(__name__)
|
||||
@@ -385,7 +385,7 @@ def per_token_group_quant_fp8(
|
||||
scaling factor.
|
||||
"""
|
||||
if use_ue8m0 is None:
|
||||
use_ue8m0 = is_blackwell_deep_gemm_e8m0_used()
|
||||
use_ue8m0 = is_deep_gemm_e8m0_used()
|
||||
dtype = current_platform.fp8_dtype() if dtype is None else dtype
|
||||
assert (x.shape[-1] % group_size == 0), (
|
||||
f"the last dimension of `x` {x.shape[-1]} must be divisible "
|
||||
|
||||
Reference in New Issue
Block a user