[GLM-OCR] GLM-OCR with MTP Support (#33005)
Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com> Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
@@ -458,6 +458,20 @@ VLM_TEST_SETTINGS = {
|
||||
],
|
||||
marks=[large_gpu_mark(min_gb=32)],
|
||||
),
|
||||
"glm_ocr": VLMTestInfo(
|
||||
models=["zai-org/GLM-OCR"],
|
||||
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
|
||||
prompt_formatter=lambda img_prompt: f"[gMASK]<|user|>\n{img_prompt}<|assistant|>\n", # noqa: E501
|
||||
img_idx_to_prompt=lambda idx: "<|begin_of_image|><|image|><|end_of_image|>",
|
||||
video_idx_to_prompt=lambda idx: "<|begin_of_video|><|video|><|end_of_video|>",
|
||||
max_model_len=2048,
|
||||
max_num_seqs=2,
|
||||
get_stop_token_ids=lambda tok: [151329, 151336, 151338],
|
||||
num_logprobs=10,
|
||||
image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
|
||||
auto_cls=AutoModelForImageTextToText,
|
||||
marks=[large_gpu_mark(min_gb=32)],
|
||||
),
|
||||
"h2ovl": VLMTestInfo(
|
||||
models=[
|
||||
"h2oai/h2ovl-mississippi-800m",
|
||||
|
||||
@@ -91,6 +91,19 @@ MODEL_CONFIGS: dict[str, dict[str, Any]] = {
|
||||
"use_processor": True,
|
||||
"question": "What is the content of each image?",
|
||||
},
|
||||
"glm_ocr": {
|
||||
"model_name": "zai-org/GLM-OCR",
|
||||
"interface": "llm_generate",
|
||||
"max_model_len": 131072,
|
||||
"max_num_seqs": 2,
|
||||
"sampling_params": {
|
||||
"temperature": 0.0,
|
||||
"max_tokens": 256,
|
||||
"stop_token_ids": None,
|
||||
},
|
||||
"use_processor": True,
|
||||
"question": "Text Recognition:",
|
||||
},
|
||||
"keye_vl": {
|
||||
"model_name": "Kwai-Keye/Keye-VL-8B-Preview",
|
||||
"interface": "llm_generate",
|
||||
|
||||
Reference in New Issue
Block a user