[AMD][QWEN3-NEXT] FP8 Tunings (#32042)
Signed-off-by: Lifan Shen <lifans@meta.com>
This commit is contained in:
@@ -22,8 +22,8 @@ from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||
|
||||
mp.set_start_method("spawn", force=True)
|
||||
|
||||
assert current_platform.is_cuda(), (
|
||||
"Only support tune w8a8 block fp8 kernel on CUDA device."
|
||||
assert current_platform.is_cuda() or current_platform.is_rocm(), (
|
||||
"Only support tune w8a8 block fp8 kernel on CUDA/ROCm device."
|
||||
)
|
||||
|
||||
DTYPE_MAP = {
|
||||
|
||||
Reference in New Issue
Block a user