[Bugfix] Check dimensions of multimodal embeddings in V1 (#15816)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -68,7 +68,7 @@ def run_blip2(questions: list[str], modality: str) -> ModelRequestData:
|
||||
# See https://huggingface.co/Salesforce/blip2-opt-2.7b/discussions/15#64ff02f3f8cf9e4f5b038262 #noqa
|
||||
prompts = [f"Question: {question} Answer:" for question in questions]
|
||||
engine_args = EngineArgs(
|
||||
model="Salesforce/blip2-opt-2.7b",
|
||||
model="Salesforce/blip2-opt-6.7b",
|
||||
disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
|
||||
)
|
||||
|
||||
@@ -128,7 +128,8 @@ def run_florence2(questions: list[str], modality: str) -> ModelRequestData:
|
||||
engine_args = EngineArgs(
|
||||
model="microsoft/Florence-2-large",
|
||||
tokenizer="facebook/bart-large",
|
||||
max_num_seqs=8,
|
||||
max_model_len=4096,
|
||||
max_num_seqs=2,
|
||||
trust_remote_code=True,
|
||||
dtype="bfloat16",
|
||||
disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
|
||||
@@ -511,7 +512,7 @@ def run_mllama(questions: list[str], modality: str) -> ModelRequestData:
|
||||
engine_args = EngineArgs(
|
||||
model=model_name,
|
||||
max_model_len=4096,
|
||||
max_num_seqs=16,
|
||||
max_num_seqs=2,
|
||||
disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
|
||||
)
|
||||
|
||||
@@ -700,7 +701,7 @@ def run_pixtral_hf(questions: list[str], modality: str) -> ModelRequestData:
|
||||
# NOTE: Need L40 (or equivalent) to avoid OOM
|
||||
engine_args = EngineArgs(
|
||||
model=model_name,
|
||||
max_model_len=8192,
|
||||
max_model_len=6144,
|
||||
max_num_seqs=2,
|
||||
disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user