[Feature] Integrate new deepgemm (#19820)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
Wentao Ye
2025-06-24 15:51:56 -04:00
committed by GitHub
parent 91f7d9d0b6
commit c6e3bba8e6
8 changed files with 230 additions and 264 deletions

View File

@@ -86,6 +86,9 @@ def benchmark_config(
(num_experts, 2 * shard_intermediate_size), dtype=torch.float32
)
w2_scale = torch.randn((hidden_size, num_experts), dtype=torch.float32)
if use_deep_gemm:
# we use the default block shape for deepgemm
block_quant_shape = [128, 128]
if use_fp8_w8a8:
if block_quant_shape:
block_n, block_k = block_quant_shape[0], block_quant_shape[1]