[Refactor] Use DeepGEMM Col Major TMA Aligned Tensor (#25517)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
Wentao Ye
2025-09-24 18:52:36 -04:00
committed by GitHub
parent 6160ba4151
commit 1f29141258
6 changed files with 34 additions and 78 deletions

View File

@@ -8,12 +8,16 @@ import torch
from vllm import _custom_ops as ops
from vllm.model_executor.layers.quantization.utils.fp8_utils import (
get_col_major_tma_aligned_tensor,
per_token_group_quant_fp8,
w8a8_triton_block_scaled_mm,
)
from vllm.triton_utils import triton
from vllm.utils.deep_gemm import calc_diff, fp8_gemm_nt, per_block_cast_to_fp8
from vllm.utils.deep_gemm import (
calc_diff,
fp8_gemm_nt,
get_col_major_tma_aligned_tensor,
per_block_cast_to_fp8,
)
def benchmark_shape(m: int,