[AMD][QWEN3-NEXT] FP8 Tunings (#32042)

Signed-off-by: Lifan Shen <lifans@meta.com>
This commit is contained in:
Lifan Shen
2026-01-27 01:34:13 -08:00
committed by GitHub
parent 58996f3589
commit da8d0c441a
5 changed files with 586 additions and 2 deletions

View File

@@ -22,8 +22,8 @@ from vllm.utils.argparse_utils import FlexibleArgumentParser
mp.set_start_method("spawn", force=True)
assert current_platform.is_cuda(), (
"Only support tune w8a8 block fp8 kernel on CUDA device."
assert current_platform.is_cuda() or current_platform.is_rocm(), (
"Only support tune w8a8 block fp8 kernel on CUDA/ROCm device."
)
DTYPE_MAP = {