diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
index 0551d4670..e2d505ade 100644
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -682,7 +682,7 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen
| `Blip2ForConditionalGeneration` | BLIP-2 | T + IE | `Salesforce/blip2-opt-2.7b`, `Salesforce/blip2-opt-6.7b`, etc. | ✅︎ | ✅︎ |
| `ChameleonForConditionalGeneration` | Chameleon | T + I | `facebook/chameleon-7b`, etc. | | ✅︎ |
| `Cohere2VisionForConditionalGeneration` | Command A Vision | T + I+ | `CohereLabs/command-a-vision-07-2025`, etc. | | ✅︎ |
-| `DeepseekVLV2ForCausalLM`^ | DeepSeek-VL2 | T + I+ | `deepseek-ai/deepseek-vl2-tiny`, `deepseek-ai/deepseek-vl2-small`, `deepseek-ai/deepseek-vl2`, etc. | | ✅︎ |
+| `DeepseekVLV2ForCausalLM` | DeepSeek-VL2 | T + I+ | `deepseek-ai/deepseek-vl2-tiny`, `deepseek-ai/deepseek-vl2-small`, `deepseek-ai/deepseek-vl2`, etc. | | ✅︎ |
| `DeepseekOCRForCausalLM` | DeepSeek-OCR | T + I+ | `deepseek-ai/DeepSeek-OCR`, etc. | ✅︎ | ✅︎ |
| `DeepseekOCR2ForCausalLM` | DeepSeek-OCR-2 | T + I+ | `deepseek-ai/DeepSeek-OCR-2`, etc. | ✅︎ | ✅︎ |
| `Eagle2_5_VLForConditionalGeneration` | Eagle2.5-VL | T + IE+ | `nvidia/Eagle2.5-8B`, etc. | ✅︎ | ✅︎ |
@@ -762,10 +762,8 @@ Some models are supported only via the [Transformers modeling backend](#transfor
|--------------|--------|--------|-------------------|-----------------------------|-----------------------------------------|
| `Emu3ForConditionalGeneration` | Emu3 | T + I | `BAAI/Emu3-Chat-hf` | ✅︎ | ✅︎ |
-^ You need to set the architecture name via `--hf-overrides` to match the one in vLLM.
- • For example, to use DeepSeek-VL2 series models:
- `--hf-overrides '{"architectures": ["DeepseekVLV2ForCausalLM"]}'`
-E Pre-computed embeddings can be inputted for this modality.
+^ You need to set the architecture name via `--hf-overrides` to match the one in vLLM.
+E Pre-computed embeddings can be inputted for this modality.
+ Multiple items can be inputted per text prompt for this modality.
!!! note
diff --git a/tests/models/registry.py b/tests/models/registry.py
index d139f707f..fe500254b 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -715,7 +715,6 @@ _MULTIMODAL_EXAMPLE_MODELS = {
extras={"fork": "Isotr0py/deepseek-vl2-tiny"},
max_transformers_version="4.48",
transformers_version_reason={"hf": "HF model is not compatible."},
- hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]},
),
"DeepseekOCRForCausalLM": _HfExamplesInfo(
"deepseek-ai/DeepSeek-OCR",
diff --git a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
index 58ae42126..673236625 100755
--- a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
+++ b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
@@ -95,18 +95,6 @@ cleanup_instances() {
sleep 2
}
-# Handle to get model-specific arguments for deepseek
-get_model_args() {
- local model_name=$1
- local extra_args=""
-
- if [[ "$model_name" == "deepseek-ai/deepseek-vl2-tiny" ]]; then
- extra_args="--hf_overrides '{\"architectures\": [\"DeepseekVLV2ForCausalLM\"]}' --trust-remote-code"
- fi
-
- echo "$extra_args"
-}
-
get_num_gpus() {
if [[ "$SMI_BIN" == *"nvidia"* ]]; then
$SMI_BIN --query-gpu=name --format=csv,noheader | wc -l
@@ -127,9 +115,6 @@ run_tests_for_model() {
echo "Testing model: $model_name"
echo "================================"
- # Get model-specific arguments
- local model_args=$(get_model_args "$model_name")
-
# Arrays to store all hosts and ports
PREFILL_HOSTS=()
PREFILL_PORTS=()
@@ -172,11 +157,7 @@ run_tests_for_model() {
BASE_CMD="${BASE_CMD} --attention-backend=$ATTENTION_BACKEND"
fi
- if [ -n "$model_args" ]; then
- FULL_CMD="$BASE_CMD $model_args"
- else
FULL_CMD="$BASE_CMD"
- fi
eval "$FULL_CMD &"
@@ -227,11 +208,7 @@ run_tests_for_model() {
--tensor-parallel-size 1 --enable-expert-parallel"
fi
- if [ -n "$model_args" ]; then
- FULL_CMD="$BASE_CMD $model_args"
- else
FULL_CMD="$BASE_CMD"
- fi
eval "$FULL_CMD &"
diff --git a/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh b/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh
index 23b2a0b1c..703a27fd3 100755
--- a/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh
+++ b/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh
@@ -55,19 +55,6 @@ cleanup_instances() {
sleep 2
}
-# Handle to get model-specific arguments for deepseek
-get_model_args() {
- local model_name=$1
- local extra_args=""
-
- if [[ "$model_name" == "deepseek-ai/deepseek-vl2-tiny" ]]; then
- extra_args="--hf_overrides '{\"architectures\": [\"DeepseekVLV2ForCausalLM\"]}' --trust-remote-code"
- fi
-
- echo "$extra_args"
-}
-
-
# Function to run tests for a specific model
run_tests_for_model() {
local model_name=$1
@@ -75,9 +62,6 @@ run_tests_for_model() {
echo "Testing model: $model_name"
echo "================================"
- # Get model-specific arguments
- local model_args=$(get_model_args "$model_name")
-
# Start prefill instance
PREFILL_PORT=8001
@@ -87,11 +71,7 @@ run_tests_for_model() {
--gpu-memory-utilization 0.2 \
--kv-transfer-config '$KV_CONFIG'"
- if [ -n "$model_args" ]; then
- FULL_CMD="$BASE_CMD $model_args"
- else
FULL_CMD="$BASE_CMD"
- fi
eval "$FULL_CMD &"
@@ -105,11 +85,7 @@ run_tests_for_model() {
--gpu-memory-utilization 0.2 \
--kv-transfer-config '$KV_CONFIG'"
- if [ -n "$model_args" ]; then
- FULL_CMD="$BASE_CMD $model_args"
- else
FULL_CMD="$BASE_CMD"
- fi
eval "$FULL_CMD &"
diff --git a/vllm/transformers_utils/configs/deepseek_vl2.py b/vllm/transformers_utils/configs/deepseek_vl2.py
index 05067c04c..822e8cdd0 100644
--- a/vllm/transformers_utils/configs/deepseek_vl2.py
+++ b/vllm/transformers_utils/configs/deepseek_vl2.py
@@ -89,6 +89,7 @@ class MlpProjectorConfig(PretrainedConfig):
class DeepseekVLV2Config(PretrainedConfig):
model_type = "deepseek_vl_v2"
+ architectures: list[str] | None = None
vision_config: VisionEncoderConfig
projector_config: MlpProjectorConfig
@@ -105,6 +106,9 @@ class DeepseekVLV2Config(PretrainedConfig):
):
super().__init__(**kwargs)
+ if self.architectures is None:
+ self.architectures = ["DeepseekVLV2ForCausalLM"]
+
vision_config = kwargs.get("vision_config", {})
self.vision_config = VisionEncoderConfig(**vision_config)
@@ -120,8 +124,7 @@ class DeepseekVLV2Config(PretrainedConfig):
self.vocab_size = self.text_config.vocab_size
# update model_type for OCR models
- architectures = self.architectures or kwargs.get("architectures", [])
- if "DeepseekOCRForCausalLM" in architectures:
+ if "DeepseekOCRForCausalLM" in self.architectures:
self.model_type = "deepseek_ocr"
- elif "DeepseekOCR2ForCausalLM" in architectures:
+ elif "DeepseekOCR2ForCausalLM" in self.architectures:
self.model_type = "deepseek_ocr2"