diff --git a/tests/entrypoints/openai/test_tokenization_vlm.py b/tests/entrypoints/openai/test_tokenization_vlm.py new file mode 100644 index 000000000..c84ac3cf7 --- /dev/null +++ b/tests/entrypoints/openai/test_tokenization_vlm.py @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +""" +Regression test: ``/tokenize`` must expand image placeholders for VLM models. + +Fixed by PR #34560 ("Move InputPreprocessor into Renderer (2/2)"). +Before that change, ``/tokenize`` returned ~26 tokens for a message with an +image instead of the expected 1451. Confirmed broken on 0.15.1 and 0.16.0. +""" + +import json + +import pytest +import requests + +from ...utils import RemoteOpenAIServer + +MODEL_NAME = "Qwen/Qwen2.5-VL-3B-Instruct" + + +@pytest.fixture(scope="module") +def server(): + args = [ + "--dtype", + "bfloat16", + "--max-model-len", + "4096", + "--max-num-seqs", + "5", + "--enforce-eager", + "--limit-mm-per-prompt", + json.dumps({"image": 1}), + ] + with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: + yield remote_server + + +def test_tokenize_chat_expands_image_placeholders( + server: RemoteOpenAIServer, + local_asset_server, +): + image_url = local_asset_server.url_for("stop_sign.jpg") + messages = [ + { + "role": "user", + "content": [ + {"type": "image_url", "image_url": {"url": image_url}}, + {"type": "text", "text": "Describe this image."}, + ], + } + ] + + response = requests.post( + server.url_for("tokenize"), + json={"model": MODEL_NAME, "messages": messages}, + ) + response.raise_for_status() + + # stop_sign.jpg (1300x876) produces 1451 tokens after expansion. + # Without expansion the count would be ~26 (text + one placeholder). + assert response.json()["count"] == 1451