[V1] Prompt logprobs + APC compatibility; prompt logprobs reqs cannot fill APC (#13949)

This commit is contained in:
afeldman-nm
2025-03-07 20:48:12 -05:00
committed by GitHub
parent 66e16a038e
commit ef64044079
9 changed files with 291 additions and 161 deletions

View File

@@ -5,7 +5,6 @@ from typing import Optional
import pytest
from tests.v1.engine.utils import PLP_APC_UNSUPPORTED_MSG
from vllm import LLM, SamplingParams
MODEL = "facebook/opt-125m"
@@ -98,17 +97,3 @@ def test_parallel_sampling(vllm_model, example_prompts) -> None:
raise AssertionError(
f"{len(completion_counts)} unique completions; expected"
f" {n}. Repeats: {repeats}")
def test_llm_engine_refuses_prompt_logprobs_with_apc(vllm_model_apc):
"""Test passes if LLMEngine raises an exception when it is configured
for automatic prefix caching and it receives a request with
prompt_logprobs enabled, which is incompatible."""
model: LLM = vllm_model_apc.model
with pytest.raises(ValueError) as excinfo:
model.generate(
"Hello, my name is",
SamplingParams(temperature=0.8, top_p=0.95, prompt_logprobs=5))
# Validate exception string is correct
assert str(excinfo.value) == PLP_APC_UNSUPPORTED_MSG