[VLM] Merged multi-modal processor for GLM4V (#12449)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Jee Jee Li
2025-02-09 04:32:16 +08:00
committed by GitHub
parent fe743b798d
commit 86222a3dab
4 changed files with 237 additions and 182 deletions

View File

@@ -106,7 +106,9 @@ def run_glm4v(question: str, modality: str):
trust_remote_code=True,
enforce_eager=True,
disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache)
prompt = question
prompt = f"<|user|>\n<|begin_of_image|><|endoftext|><|end_of_image|>\
{question}<|assistant|>"
stop_token_ids = [151329, 151336, 151338]
return llm, prompt, stop_token_ids