[V1] Support any head size for FlexAttention backend (#20467)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-07-07 00:54:36 +08:00
committed by GitHub
parent e202dd2736
commit 9fb52e523a
20 changed files with 202 additions and 118 deletions

View File

@@ -98,7 +98,7 @@ def run_blip2(questions: list[str], modality: str) -> ModelRequestData:
# See https://huggingface.co/Salesforce/blip2-opt-2.7b/discussions/15#64ff02f3f8cf9e4f5b038262 #noqa
prompts = [f"Question: {question} Answer:" for question in questions]
engine_args = EngineArgs(
model="Salesforce/blip2-opt-6.7b",
model="Salesforce/blip2-opt-2.7b",
limit_mm_per_prompt={modality: 1},
)
@@ -971,7 +971,7 @@ def run_pixtral_hf(questions: list[str], modality: str) -> ModelRequestData:
)
# Qwen
# Qwen-VL
def run_qwen_vl(questions: list[str], modality: str) -> ModelRequestData:
assert modality == "image"