dynamic distpatch of fp8 kernels (#14245)

Signed-off-by: Jeff Daily <jeff.daily@amd.com>
2025-03-11 07:54:56 -07:00
parent 08a1a1121d
commit a1c8f3796c
25 changed files with 292 additions and 159 deletions
--- a/tests/kernels/quant_utils.py
+++ b/tests/kernels/quant_utils.py
@@ -9,8 +9,7 @@ from vllm.platforms import current_platform
 # Using the default value (240.0) from pytorch will cause accuracy
 # issue on dynamic quantization models. Here use 224.0 for rocm.
 ROCM_FP8_MAX = 224.0
-FP8_DTYPE = torch.float8_e4m3fnuz if current_platform.is_rocm() \
-                else torch.float8_e4m3fn
+FP8_DTYPE = current_platform.fp8_dtype()


 def as_float32_tensor(x: Union[float, torch.tensor]) -> torch.tensor: