Fix GLM-4.5V-FP8 numerical issue (#22949)
Signed-off-by: qizixi <qizixi@meta.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -1064,6 +1064,76 @@ def load_tarsier2(question: str, image_urls: list[str]) -> ModelRequestData:
|
||||
)
|
||||
|
||||
|
||||
# GLM-4.5V
|
||||
def load_glm4_5v(question: str, image_urls: list[str]) -> ModelRequestData:
|
||||
model_name = "zai-org/GLM-4.5V"
|
||||
|
||||
engine_args = EngineArgs(
|
||||
model=model_name,
|
||||
max_model_len=32768,
|
||||
max_num_seqs=2,
|
||||
limit_mm_per_prompt={"image": len(image_urls)},
|
||||
enforce_eager=True,
|
||||
tensor_parallel_size=4,
|
||||
)
|
||||
placeholders = [{"type": "image", "image": url} for url in image_urls]
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
*placeholders,
|
||||
{"type": "text", "text": question},
|
||||
],
|
||||
}
|
||||
]
|
||||
processor = AutoProcessor.from_pretrained(model_name)
|
||||
prompt = processor.apply_chat_template(
|
||||
messages, tokenize=False, add_generation_prompt=True
|
||||
)
|
||||
image_data = [fetch_image(url) for url in image_urls]
|
||||
|
||||
return ModelRequestData(
|
||||
engine_args=engine_args,
|
||||
prompt=prompt,
|
||||
image_data=image_data,
|
||||
)
|
||||
|
||||
|
||||
# GLM-4.5V-FP8
|
||||
def load_glm4_5v_fp8(question: str, image_urls: list[str]) -> ModelRequestData:
|
||||
model_name = "zai-org/GLM-4.5V-FP8"
|
||||
|
||||
engine_args = EngineArgs(
|
||||
model=model_name,
|
||||
max_model_len=32768,
|
||||
max_num_seqs=2,
|
||||
limit_mm_per_prompt={"image": len(image_urls)},
|
||||
enforce_eager=True,
|
||||
tensor_parallel_size=4,
|
||||
)
|
||||
placeholders = [{"type": "image", "image": url} for url in image_urls]
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
*placeholders,
|
||||
{"type": "text", "text": question},
|
||||
],
|
||||
}
|
||||
]
|
||||
processor = AutoProcessor.from_pretrained(model_name)
|
||||
prompt = processor.apply_chat_template(
|
||||
messages, tokenize=False, add_generation_prompt=True
|
||||
)
|
||||
image_data = [fetch_image(url) for url in image_urls]
|
||||
|
||||
return ModelRequestData(
|
||||
engine_args=engine_args,
|
||||
prompt=prompt,
|
||||
image_data=image_data,
|
||||
)
|
||||
|
||||
|
||||
model_example_map = {
|
||||
"aria": load_aria,
|
||||
"aya_vision": load_aya_vision,
|
||||
@@ -1096,6 +1166,8 @@ model_example_map = {
|
||||
"step3": load_step3,
|
||||
"tarsier": load_tarsier,
|
||||
"tarsier2": load_tarsier2,
|
||||
"glm4_5v": load_glm4_5v,
|
||||
"glm4_5v_fp8": load_glm4_5v_fp8,
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user