Tuned H100/H200 triton fp8 block configs for fused_qkv_a_proj (#23939)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2025-08-29 13:28:35 -04:00
committed by GitHub
parent 4d7fe40fc0
commit b7adf94c4a
4 changed files with 271 additions and 3 deletions

View File

@@ -16,6 +16,7 @@ assert current_platform.is_cuda(), (
# DeepSeek-V3 weight shapes
DEEPSEEK_V3_SHAPES = [
(512 + 64, 7168),
(2112, 7168),
((128 + 64) * 128, 7168),
(128 * (128 + 128), 512),
(7168, 16384),