diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md index 0328fe522..23cf7f66b 100644 --- a/docs/models/supported_models.md +++ b/docs/models/supported_models.md @@ -677,7 +677,7 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen | `ChameleonForConditionalGeneration` | Chameleon | T + I | `facebook/chameleon-7b`, etc. | | ✅︎ | | `Cohere2VisionForConditionalGeneration` | Command A Vision | T + I+ | `CohereLabs/command-a-vision-07-2025`, etc. | | ✅︎ | | `DeepseekVLV2ForCausalLM`^ | DeepSeek-VL2 | T + I+ | `deepseek-ai/deepseek-vl2-tiny`, `deepseek-ai/deepseek-vl2-small`, `deepseek-ai/deepseek-vl2`, etc. | | ✅︎ | -| `DeepseekOCRForCausalLM` | DeepSeek-OCR | T + I+ | `deepseek-ai/DeepSeek-OCR`, etc. | | ✅︎ | +| `DeepseekOCRForCausalLM` | DeepSeek-OCR | T + I+ | `deepseek-ai/DeepSeek-OCR`, etc. | ✅︎ | ✅︎ | | `Ernie4_5_VLMoeForConditionalGeneration` | Ernie4.5-VL | T + I+/ V+ | `baidu/ERNIE-4.5-VL-28B-A3B-PT`, `baidu/ERNIE-4.5-VL-424B-A47B-PT` | | ✅︎ | | `FuyuForCausalLM` | Fuyu | T + I | `adept/fuyu-8b`, etc. | | ✅︎ | | `Gemma3ForConditionalGeneration` | Gemma 3 | T + IE+ | `google/gemma-3-4b-it`, `google/gemma-3-27b-it`, etc. | ✅︎ | ✅︎ | diff --git a/vllm/model_executor/models/deepseek_ocr.py b/vllm/model_executor/models/deepseek_ocr.py index 1f07381c0..146c673dd 100644 --- a/vllm/model_executor/models/deepseek_ocr.py +++ b/vllm/model_executor/models/deepseek_ocr.py @@ -14,9 +14,11 @@ from vllm.config import VllmConfig from vllm.config.multimodal import BaseDummyOptions from vllm.model_executor.models.interfaces import ( MultiModalEmbeddings, + SupportsLoRA, SupportsMultiModal, SupportsPP, ) +from vllm.model_executor.models.module_mapping import MultiModelKeys from vllm.model_executor.models.utils import ( AutoWeightsLoader, WeightsMapper, @@ -343,7 +345,7 @@ class DeepseekOCRMultiModalProcessor( info=DeepseekOCRProcessingInfo, dummy_inputs=DeepseekOCRDummyInputsBuilder, ) -class DeepseekOCRForCausalLM(nn.Module, SupportsMultiModal, SupportsPP): +class DeepseekOCRForCausalLM(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA): hf_to_vllm_mapper = WeightsMapper( orig_to_new_prefix={ # map prefix for language backbone @@ -589,3 +591,13 @@ class DeepseekOCRForCausalLM(nn.Module, SupportsMultiModal, SupportsPP): loader = AutoWeightsLoader(self) autoloaded_weights = loader.load_weights(weights, mapper=self.hf_to_vllm_mapper) return autoloaded_weights + + def get_mm_mapping(self) -> MultiModelKeys: + """ + Get the module prefix in multimodal models + """ + return MultiModelKeys.from_string_field( + language_model="language_model", + connector="projector", + tower_model=["sam_model", "vision_model"], + )