[Refactor] Use DeepGEMM Col Major TMA Aligned Tensor (#25517)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
2025-09-24 18:52:36 -04:00
parent 6160ba4151
commit 1f29141258
6 changed files with 34 additions and 78 deletions
--- a/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py
+++ b/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py
@@ -8,12 +8,16 @@ import torch

 from vllm import _custom_ops as ops
 from vllm.model_executor.layers.quantization.utils.fp8_utils import (
-    get_col_major_tma_aligned_tensor,
    per_token_group_quant_fp8,
    w8a8_triton_block_scaled_mm,
 )
 from vllm.triton_utils import triton
-from vllm.utils.deep_gemm import calc_diff, fp8_gemm_nt, per_block_cast_to_fp8
+from vllm.utils.deep_gemm import (
+    calc_diff,
+    fp8_gemm_nt,
+    get_col_major_tma_aligned_tensor,
+    per_block_cast_to_fp8,
+)


 def benchmark_shape(m: int,