Tuned H100/H200 triton fp8 block configs for fused_qkv_a_proj (#23939)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
@@ -141,6 +141,7 @@ def get_weight_shapes(tp_size):
|
||||
# cannot TP
|
||||
total = [
|
||||
(512 + 64, 7168),
|
||||
(2112, 7168),
|
||||
((128 + 64) * 128, 7168),
|
||||
(128 * (128 + 128), 512),
|
||||
(7168, 16384),
|
||||
|
||||
Reference in New Issue
Block a user