diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md index 0551d4670..e2d505ade 100644 --- a/docs/models/supported_models.md +++ b/docs/models/supported_models.md @@ -682,7 +682,7 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen | `Blip2ForConditionalGeneration` | BLIP-2 | T + IE | `Salesforce/blip2-opt-2.7b`, `Salesforce/blip2-opt-6.7b`, etc. | ✅︎ | ✅︎ | | `ChameleonForConditionalGeneration` | Chameleon | T + I | `facebook/chameleon-7b`, etc. | | ✅︎ | | `Cohere2VisionForConditionalGeneration` | Command A Vision | T + I+ | `CohereLabs/command-a-vision-07-2025`, etc. | | ✅︎ | -| `DeepseekVLV2ForCausalLM`^ | DeepSeek-VL2 | T + I+ | `deepseek-ai/deepseek-vl2-tiny`, `deepseek-ai/deepseek-vl2-small`, `deepseek-ai/deepseek-vl2`, etc. | | ✅︎ | +| `DeepseekVLV2ForCausalLM` | DeepSeek-VL2 | T + I+ | `deepseek-ai/deepseek-vl2-tiny`, `deepseek-ai/deepseek-vl2-small`, `deepseek-ai/deepseek-vl2`, etc. | | ✅︎ | | `DeepseekOCRForCausalLM` | DeepSeek-OCR | T + I+ | `deepseek-ai/DeepSeek-OCR`, etc. | ✅︎ | ✅︎ | | `DeepseekOCR2ForCausalLM` | DeepSeek-OCR-2 | T + I+ | `deepseek-ai/DeepSeek-OCR-2`, etc. | ✅︎ | ✅︎ | | `Eagle2_5_VLForConditionalGeneration` | Eagle2.5-VL | T + IE+ | `nvidia/Eagle2.5-8B`, etc. | ✅︎ | ✅︎ | @@ -762,10 +762,8 @@ Some models are supported only via the [Transformers modeling backend](#transfor |--------------|--------|--------|-------------------|-----------------------------|-----------------------------------------| | `Emu3ForConditionalGeneration` | Emu3 | T + I | `BAAI/Emu3-Chat-hf` | ✅︎ | ✅︎ | -^ You need to set the architecture name via `--hf-overrides` to match the one in vLLM. -    • For example, to use DeepSeek-VL2 series models: -      `--hf-overrides '{"architectures": ["DeepseekVLV2ForCausalLM"]}'` -E Pre-computed embeddings can be inputted for this modality. +^ You need to set the architecture name via `--hf-overrides` to match the one in vLLM.
+E Pre-computed embeddings can be inputted for this modality.
+ Multiple items can be inputted per text prompt for this modality. !!! note diff --git a/tests/models/registry.py b/tests/models/registry.py index d139f707f..fe500254b 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -715,7 +715,6 @@ _MULTIMODAL_EXAMPLE_MODELS = { extras={"fork": "Isotr0py/deepseek-vl2-tiny"}, max_transformers_version="4.48", transformers_version_reason={"hf": "HF model is not compatible."}, - hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]}, ), "DeepseekOCRForCausalLM": _HfExamplesInfo( "deepseek-ai/DeepSeek-OCR", diff --git a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh index 58ae42126..673236625 100755 --- a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh +++ b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh @@ -95,18 +95,6 @@ cleanup_instances() { sleep 2 } -# Handle to get model-specific arguments for deepseek -get_model_args() { - local model_name=$1 - local extra_args="" - - if [[ "$model_name" == "deepseek-ai/deepseek-vl2-tiny" ]]; then - extra_args="--hf_overrides '{\"architectures\": [\"DeepseekVLV2ForCausalLM\"]}' --trust-remote-code" - fi - - echo "$extra_args" -} - get_num_gpus() { if [[ "$SMI_BIN" == *"nvidia"* ]]; then $SMI_BIN --query-gpu=name --format=csv,noheader | wc -l @@ -127,9 +115,6 @@ run_tests_for_model() { echo "Testing model: $model_name" echo "================================" - # Get model-specific arguments - local model_args=$(get_model_args "$model_name") - # Arrays to store all hosts and ports PREFILL_HOSTS=() PREFILL_PORTS=() @@ -172,11 +157,7 @@ run_tests_for_model() { BASE_CMD="${BASE_CMD} --attention-backend=$ATTENTION_BACKEND" fi - if [ -n "$model_args" ]; then - FULL_CMD="$BASE_CMD $model_args" - else FULL_CMD="$BASE_CMD" - fi eval "$FULL_CMD &" @@ -227,11 +208,7 @@ run_tests_for_model() { --tensor-parallel-size 1 --enable-expert-parallel" fi - if [ -n "$model_args" ]; then - FULL_CMD="$BASE_CMD $model_args" - else FULL_CMD="$BASE_CMD" - fi eval "$FULL_CMD &" diff --git a/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh b/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh index 23b2a0b1c..703a27fd3 100755 --- a/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh +++ b/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh @@ -55,19 +55,6 @@ cleanup_instances() { sleep 2 } -# Handle to get model-specific arguments for deepseek -get_model_args() { - local model_name=$1 - local extra_args="" - - if [[ "$model_name" == "deepseek-ai/deepseek-vl2-tiny" ]]; then - extra_args="--hf_overrides '{\"architectures\": [\"DeepseekVLV2ForCausalLM\"]}' --trust-remote-code" - fi - - echo "$extra_args" -} - - # Function to run tests for a specific model run_tests_for_model() { local model_name=$1 @@ -75,9 +62,6 @@ run_tests_for_model() { echo "Testing model: $model_name" echo "================================" - # Get model-specific arguments - local model_args=$(get_model_args "$model_name") - # Start prefill instance PREFILL_PORT=8001 @@ -87,11 +71,7 @@ run_tests_for_model() { --gpu-memory-utilization 0.2 \ --kv-transfer-config '$KV_CONFIG'" - if [ -n "$model_args" ]; then - FULL_CMD="$BASE_CMD $model_args" - else FULL_CMD="$BASE_CMD" - fi eval "$FULL_CMD &" @@ -105,11 +85,7 @@ run_tests_for_model() { --gpu-memory-utilization 0.2 \ --kv-transfer-config '$KV_CONFIG'" - if [ -n "$model_args" ]; then - FULL_CMD="$BASE_CMD $model_args" - else FULL_CMD="$BASE_CMD" - fi eval "$FULL_CMD &" diff --git a/vllm/transformers_utils/configs/deepseek_vl2.py b/vllm/transformers_utils/configs/deepseek_vl2.py index 05067c04c..822e8cdd0 100644 --- a/vllm/transformers_utils/configs/deepseek_vl2.py +++ b/vllm/transformers_utils/configs/deepseek_vl2.py @@ -89,6 +89,7 @@ class MlpProjectorConfig(PretrainedConfig): class DeepseekVLV2Config(PretrainedConfig): model_type = "deepseek_vl_v2" + architectures: list[str] | None = None vision_config: VisionEncoderConfig projector_config: MlpProjectorConfig @@ -105,6 +106,9 @@ class DeepseekVLV2Config(PretrainedConfig): ): super().__init__(**kwargs) + if self.architectures is None: + self.architectures = ["DeepseekVLV2ForCausalLM"] + vision_config = kwargs.get("vision_config", {}) self.vision_config = VisionEncoderConfig(**vision_config) @@ -120,8 +124,7 @@ class DeepseekVLV2Config(PretrainedConfig): self.vocab_size = self.text_config.vocab_size # update model_type for OCR models - architectures = self.architectures or kwargs.get("architectures", []) - if "DeepseekOCRForCausalLM" in architectures: + if "DeepseekOCRForCausalLM" in self.architectures: self.model_type = "deepseek_ocr" - elif "DeepseekOCR2ForCausalLM" in architectures: + elif "DeepseekOCR2ForCausalLM" in self.architectures: self.model_type = "deepseek_ocr2"