[V1] Prompt logprobs + APC compatibility; prompt logprobs reqs cannot fill APC (#13949)
This commit is contained in:
@@ -6,7 +6,6 @@ from typing import Optional
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.v1.engine.utils import PLP_APC_UNSUPPORTED_MSG
|
||||
from vllm import SamplingParams
|
||||
from vllm.assets.image import ImageAsset
|
||||
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||
@@ -72,41 +71,6 @@ async def generate(engine: AsyncLLM,
|
||||
return count, request_id
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"output_kind", [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY])
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_llm_refuses_prompt_logprobs_with_apc(
|
||||
monkeypatch, output_kind: RequestOutputKind):
|
||||
"""Test passes if AsyncLLM raises an exception when it is configured
|
||||
for automatic prefix caching and it receives a request with
|
||||
prompt_logprobs enabled, which is incompatible."""
|
||||
# TODO(rickyx): Remove monkeypatch VLLM_USE_V1 setting once we have a
|
||||
# better way to test V1 so that in the future when we switch, we don't
|
||||
# have to change all the tests.
|
||||
monkeypatch.setenv("VLLM_USE_V1", "1")
|
||||
# Create AsyncLLM engine with APC
|
||||
apc_engine_args = AsyncEngineArgs(model="facebook/opt-125m",
|
||||
enable_prefix_caching=True,
|
||||
gpu_memory_utilization=0.8,
|
||||
disable_log_requests=True)
|
||||
engine = AsyncLLM.from_engine_args(apc_engine_args)
|
||||
try:
|
||||
with pytest.raises(ValueError) as excinfo:
|
||||
# Issue a request with prompt logprobs enabled, which should fail
|
||||
await asyncio.create_task(
|
||||
generate(engine,
|
||||
"request-0",
|
||||
TEXT_PROMPT,
|
||||
output_kind,
|
||||
10,
|
||||
prompt_logprobs=5))
|
||||
# Validate exception string is correct
|
||||
assert str(excinfo.value) == PLP_APC_UNSUPPORTED_MSG
|
||||
finally:
|
||||
# Shut down engine
|
||||
engine.shutdown()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"output_kind", [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY])
|
||||
@pytest.mark.parametrize("engine_args_and_prompt",
|
||||
|
||||
Reference in New Issue
Block a user