Tuned H100/H200 triton fp8 block configs for fused_qkv_a_proj (#23939)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2025-08-29 13:28:35 -04:00
committed by GitHub
parent 4d7fe40fc0
commit b7adf94c4a
4 changed files with 271 additions and 3 deletions

View File

@@ -141,6 +141,7 @@ def get_weight_shapes(tp_size):
# cannot TP
total = [
(512 + 64, 7168),
(2112, 7168),
((128 + 64) * 128, 7168),
(128 * (128 + 128), 512),
(7168, 16384),