diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md index 7f20d2052..d30518da2 100644 --- a/docs/models/supported_models.md +++ b/docs/models/supported_models.md @@ -728,6 +728,8 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen | `OpenPanguVLForConditionalGeneration` | openpangu-VL | T + IE+ + VE+ |`FreedomIntelligence/openPangu-VL-7B` | ✅︎ | ✅︎ | | `Ovis` | Ovis2, Ovis1.6 | T + I+ | `AIDC-AI/Ovis2-1B`, `AIDC-AI/Ovis1.6-Llama3.2-3B`, etc. | | ✅︎ | | `Ovis2_5` | Ovis2.5 | T + I+ + V | `AIDC-AI/Ovis2.5-9B`, etc. | | | +| `Ovis2_6ForCausalLM` | Ovis2.6 | T + I+ + V | `AIDC-AI/Ovis2.6-2B`, etc. | | | +| `Ovis2_6_MoeForCausalLM` | Ovis2.6 | T + I+ + V | `AIDC-AI/Ovis2.6-30B-A3B`, etc. | | | | `PaddleOCRVLForConditionalGeneration` | Paddle-OCR | T + I+ | `PaddlePaddle/PaddleOCR-VL`, etc. | | | | `PaliGemmaForConditionalGeneration` | PaliGemma, PaliGemma 2 | T + IE | `google/paligemma-3b-pt-224`, `google/paligemma-3b-mix-224`, `google/paligemma2-3b-ft-docci-448`, etc. | ✅︎ | ✅︎ | | `Phi3VForCausalLM` | Phi-3-Vision, Phi-3.5-Vision | T + IE+ | `microsoft/Phi-3-vision-128k-instruct`, `microsoft/Phi-3.5-vision-instruct`, etc. | | ✅︎ | diff --git a/tests/models/registry.py b/tests/models/registry.py index dcd1fa8ed..78d478020 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -915,6 +915,12 @@ _MULTIMODAL_EXAMPLE_MODELS = { }, ), "Ovis2_5": _HfExamplesInfo("AIDC-AI/Ovis2.5-2B", trust_remote_code=True), + "Ovis2_6ForCausalLM": _HfExamplesInfo( + "AIDC-AI/Ovis2.6-2B", is_available_online=False, trust_remote_code=True + ), + "Ovis2_6_MoeForCausalLM": _HfExamplesInfo( + "AIDC-AI/Ovis2.6-30B-A3B", trust_remote_code=True + ), "PaddleOCRVLForConditionalGeneration": _HfExamplesInfo( "PaddlePaddle/PaddleOCR-VL", trust_remote_code=True, diff --git a/vllm/model_executor/models/ovis2_5.py b/vllm/model_executor/models/ovis2_5.py index 8d038d4ad..00418d707 100644 --- a/vllm/model_executor/models/ovis2_5.py +++ b/vllm/model_executor/models/ovis2_5.py @@ -42,21 +42,12 @@ from vllm.utils.tensor_schema import TensorSchema, TensorShape from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP IMAGE_TOKEN = "" +IMAGE_PLACEHOLDER_ID = 151669 VIDEO_TOKEN = "