[VLM] Disallow overflowing max_model_len for multimodal models (#7998)

This commit is contained in:
Cyrus Leung
2024-08-30 08:49:04 +08:00
committed by GitHub
parent 0c785d344d
commit 4abed65c58
2 changed files with 35 additions and 3 deletions

View File

@@ -179,3 +179,20 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
num_logprobs=num_logprobs,
tensor_parallel_size=1,
)
@pytest.mark.parametrize("model", models)
def test_context_length_too_short(vllm_runner, image_assets, model):
images = [asset.pil_image for asset in image_assets]
with pytest.raises(ValueError, match="too long to fit into the model"):
vllm_model = vllm_runner(
model,
max_model_len=128, # LLaVA has a feature size of 576
enforce_eager=True,
)
with vllm_model:
vllm_model.generate_greedy([HF_IMAGE_PROMPTS[0]],
max_tokens=1,
images=[images[0]])