[V1] Support any head size for FlexAttention backend (#20467)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-07-07 00:54:36 +08:00
committed by GitHub
parent e202dd2736
commit 9fb52e523a
20 changed files with 202 additions and 118 deletions

View File

@@ -83,7 +83,7 @@ MODELS = [
QWEN2_CONFIG,
PHI3_CONFIG,
GPT2_CONFIG,
# STABLELM_CONFIG, # enable this when v1 support head_size=80
STABLELM_CONFIG,
DOLPHIN_CONFIG,
# STARCODER_CONFIG, # broken
]