diff --git a/examples/pooling/classify/vision_classification_online.py b/examples/pooling/classify/vision_classification_online.py index 021d3dfe5..624f6beb5 100644 --- a/examples/pooling/classify/vision_classification_online.py +++ b/examples/pooling/classify/vision_classification_online.py @@ -8,7 +8,7 @@ NOTE: --runner pooling \ --max-model-len 5000 \ --limit-mm-per-prompt.video 1 \ - --hf-overrides '{"text_config": {"architectures": ["Qwen2_5_VLForSequenceClassification"]}}' + --hf-overrides '{"architectures": ["Qwen2_5_VLForSequenceClassification"]}' """ import argparse diff --git a/tests/entrypoints/pooling/classify/test_online_vision.py b/tests/entrypoints/pooling/classify/test_online_vision.py index 312bb6fe5..2776dc8d8 100644 --- a/tests/entrypoints/pooling/classify/test_online_vision.py +++ b/tests/entrypoints/pooling/classify/test_online_vision.py @@ -12,11 +12,7 @@ from vllm.multimodal.utils import encode_image_url, fetch_image MODEL_NAME = "muziyongshixin/Qwen2.5-VL-7B-for-VideoCls" MAXIMUM_VIDEOS = 1 -HF_OVERRIDES = { - "text_config": { - "architectures": ["Qwen2_5_VLForSequenceClassification"], - }, -} +HF_OVERRIDES = {"architectures": ["Qwen2_5_VLForSequenceClassification"]} input_text = "This product was excellent and exceeded my expectations" image_url = "https://vllm-public-assets.s3.us-west-2.amazonaws.com/multimodal_asset/cat_snow.jpg" image_base64 = {"url": encode_image_url(fetch_image(image_url))}