[VLM] Disallow overflowing max_model_len for multimodal models (#7998)

2024-08-30 08:49:04 +08:00
parent 0c785d344d
commit 4abed65c58
2 changed files with 35 additions and 3 deletions
--- a/tests/models/test_llava.py
+++ b/tests/models/test_llava.py
@@ -179,3 +179,20 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
        num_logprobs=num_logprobs,
        tensor_parallel_size=1,
    )
+
+
+@pytest.mark.parametrize("model", models)
+def test_context_length_too_short(vllm_runner, image_assets, model):
+    images = [asset.pil_image for asset in image_assets]
+
+    with pytest.raises(ValueError, match="too long to fit into the model"):
+        vllm_model = vllm_runner(
+            model,
+            max_model_len=128,  # LLaVA has a feature size of 576
+            enforce_eager=True,
+        )
+
+        with vllm_model:
+            vllm_model.generate_greedy([HF_IMAGE_PROMPTS[0]],
+                                       max_tokens=1,
+                                       images=[images[0]])