[Model] Refactoring of MiniCPM-V and add MiniCPM-o-2.6 support for vLLM (#12069)
Signed-off-by: hzh <hezhihui_thu@163.com> Signed-off-by: Sungjae Lee <33976427+llsj14@users.noreply.github.com> Signed-off-by: shaochangxu.scx <shaochangxu.scx@antgroup.com> Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: NickLucche <nlucches@redhat.com> Signed-off-by: Isotr0py <2037008807@qq.com> Signed-off-by: Roger Wang <ywang@roblox.com> Signed-off-by: Rafael Vasquez <rafvasq21@gmail.com> Signed-off-by: Akshat Tripathi <akshat@krai.ai> Signed-off-by: Oleg Mosalov <oleg@krai.ai> Signed-off-by: Jee Jee Li <pandaleefree@gmail.com> Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com> Signed-off-by: Yida Wu <yidawu@alumni.cmu.edu> Signed-off-by: Chenguang Li <757486878@qq.com> Signed-off-by: youkaichao <youkaichao@gmail.com> Signed-off-by: Alex-Brooks <Alex.brooks@ibm.com> Signed-off-by: Chen Zhang <zhangch99@outlook.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: Shanshan Shen <467638484@qq.com> Signed-off-by: elijah <f1renze.142857@gmail.com> Signed-off-by: Yikun <yikunkero@gmail.com> Signed-off-by: mgoin <michael@neuralmagic.com> Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu> Signed-off-by: Konrad Zawora <kzawora@habana.ai> Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com> Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> Signed-off-by: Rui Qiao <ruisearch42@gmail.com> Co-authored-by: Sungjae Lee <33976427+llsj14@users.noreply.github.com> Co-authored-by: shaochangxu <85155497+shaochangxu@users.noreply.github.com> Co-authored-by: shaochangxu.scx <shaochangxu.scx@antgroup.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk> Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com> Co-authored-by: sixgod <evethwillbeok@outlook.com> Co-authored-by: Isotr0py <2037008807@qq.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> Co-authored-by: Rafael Vasquez <rafvasq21@gmail.com> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> Co-authored-by: Akshat Tripathi <Akshat.tripathi6568@gmail.com> Co-authored-by: Oleg Mosalov <oleg@krai.ai> Co-authored-by: Jee Jee Li <pandaleefree@gmail.com> Co-authored-by: Avshalom Manevich <12231371+avshalomman@users.noreply.github.com> Co-authored-by: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Co-authored-by: Yangcheng Li <liyangcheng.lyc@alibaba-inc.com> Co-authored-by: Siyuan Li <94890248+liaoyanqing666@users.noreply.github.com> Co-authored-by: Concurrensee <yida.wu@amd.com> Co-authored-by: Chenguang Li <757486878@qq.com> Co-authored-by: youkaichao <youkaichao@gmail.com> Co-authored-by: Alex Brooks <alex.brooks@ibm.com> Co-authored-by: Chen Zhang <zhangch99@outlook.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Shanshan Shen <467638484@qq.com> Co-authored-by: elijah <30852919+e1ijah1@users.noreply.github.com> Co-authored-by: Yikun Jiang <yikunkero@gmail.com> Co-authored-by: Steve Luo <36296769+SunflowerAries@users.noreply.github.com> Co-authored-by: mgoin <michael@neuralmagic.com> Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu> Co-authored-by: Konrad Zawora <kzawora@habana.ai> Co-authored-by: TJian <tunjian1996@gmail.com> Co-authored-by: tjtanaa <tunjian.tan@embeddedllm.com> Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com> Co-authored-by: maang-h <55082429+maang-h@users.noreply.github.com> Co-authored-by: Elfie Guo <164945471+elfiegg@users.noreply.github.com> Co-authored-by: Rui Qiao <161574667+ruisearch42@users.noreply.github.com> Co-authored-by: Roger Wang <ywang@roblox.com>
This commit is contained in:
@@ -265,8 +265,9 @@ def run_mantis(question: str, modality: str):
|
||||
|
||||
|
||||
# MiniCPM-V
|
||||
def run_minicpmv(question: str, modality: str):
|
||||
assert modality == "image"
|
||||
def run_minicpmv_base(question: str, modality: str, model_name):
|
||||
assert modality in ["image", "video"]
|
||||
# If you want to use `MiniCPM-o-2_6` with audio inputs, check `audio_language.py` # noqa
|
||||
|
||||
# 2.0
|
||||
# The official repo doesn't work yet, so we need to use a fork for now
|
||||
@@ -277,7 +278,15 @@ def run_minicpmv(question: str, modality: str):
|
||||
# model_name = "openbmb/MiniCPM-Llama3-V-2_5"
|
||||
|
||||
# 2.6
|
||||
model_name = "openbmb/MiniCPM-V-2_6"
|
||||
# model_name = "openbmb/MiniCPM-V-2_6"
|
||||
# o2.6
|
||||
|
||||
# modality supports
|
||||
# 2.0: image
|
||||
# 2.5: image
|
||||
# 2.6: image, video
|
||||
# o2.6: image, video, audio
|
||||
# model_name = "openbmb/MiniCPM-o-2_6"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name,
|
||||
trust_remote_code=True)
|
||||
llm = LLM(
|
||||
@@ -294,13 +303,18 @@ def run_minicpmv(question: str, modality: str):
|
||||
# 2.5
|
||||
# stop_token_ids = [tokenizer.eos_id, tokenizer.eot_id]
|
||||
|
||||
# 2.6
|
||||
# 2.6 / o2.6
|
||||
stop_tokens = ['<|im_end|>', '<|endoftext|>']
|
||||
stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens]
|
||||
|
||||
modality_placeholder = {
|
||||
"image": "(<image>./</image>)",
|
||||
"video": "(<video>./</video>)",
|
||||
}
|
||||
|
||||
messages = [{
|
||||
'role': 'user',
|
||||
'content': f'(<image>./</image>)\n{question}'
|
||||
'content': f'{modality_placeholder[modality]}\n{question}'
|
||||
}]
|
||||
prompt = tokenizer.apply_chat_template(messages,
|
||||
tokenize=False,
|
||||
@@ -308,6 +322,14 @@ def run_minicpmv(question: str, modality: str):
|
||||
return llm, prompt, stop_token_ids
|
||||
|
||||
|
||||
def run_minicpmo(question: str, modality: str):
|
||||
return run_minicpmv_base(question, modality, "openbmb/MiniCPM-o-2_6")
|
||||
|
||||
|
||||
def run_minicpmv(question: str, modality: str):
|
||||
return run_minicpmv_base(question, modality, "openbmb/MiniCPM-V-2_6")
|
||||
|
||||
|
||||
# LLama 3.2
|
||||
def run_mllama(question: str, modality: str):
|
||||
assert modality == "image"
|
||||
@@ -523,6 +545,7 @@ model_example_map = {
|
||||
"llava-next-video": run_llava_next_video,
|
||||
"llava-onevision": run_llava_onevision,
|
||||
"mantis": run_mantis,
|
||||
"minicpmo": run_minicpmo,
|
||||
"minicpmv": run_minicpmv,
|
||||
"mllama": run_mllama,
|
||||
"molmo": run_molmo,
|
||||
|
||||
Reference in New Issue
Block a user