[Perf] Create TMA-aligned input scale tensor for DeepGemm on Hopper (#32619)
Signed-off-by: Xin Yang <xyangx@amazon.com>
This commit is contained in:
@@ -8,13 +8,16 @@ import torch
|
||||
from vllm.model_executor.layers.quantization.utils import fp8_utils, int8_utils
|
||||
|
||||
|
||||
@pytest.mark.parametrize("shape", [(32, 128), (64, 256), (16, 512)])
|
||||
@pytest.mark.parametrize(
|
||||
"shape", [(31, 128), (32, 128), (63, 256), (64, 256), (16, 512)]
|
||||
)
|
||||
@pytest.mark.parametrize("column_major", [False, True])
|
||||
@pytest.mark.parametrize("tma_aligned", [False, True])
|
||||
@pytest.mark.parametrize("scale_ue8m0", [False, True])
|
||||
@pytest.mark.parametrize("group_size", [64, 128])
|
||||
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
|
||||
def test_per_token_group_quant_fp8(
|
||||
shape, column_major: bool, scale_ue8m0: bool, group_size: int
|
||||
shape, column_major: bool, tma_aligned: bool, scale_ue8m0: bool, group_size: int
|
||||
):
|
||||
device = "cuda"
|
||||
|
||||
@@ -28,6 +31,7 @@ def test_per_token_group_quant_fp8(
|
||||
x,
|
||||
group_size,
|
||||
column_major_scales=column_major,
|
||||
tma_aligned_scales=tma_aligned,
|
||||
use_ue8m0=scale_ue8m0,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user