[Deprecation] Remove prompt_token_ids arg fallback in LLM.generate and LLM.embed (#18800)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -5,11 +5,9 @@ import weakref
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm import LLM, PoolingParams, PoolingRequestOutput
|
||||
from vllm import LLM, PoolingParams
|
||||
from vllm.distributed import cleanup_dist_env_and_memory
|
||||
|
||||
from ...models.utils import check_embeddings_close
|
||||
|
||||
MODEL_NAME = "intfloat/multilingual-e5-small"
|
||||
|
||||
PROMPTS = [
|
||||
@@ -48,57 +46,13 @@ def llm():
|
||||
enforce_eager=True,
|
||||
seed=0)
|
||||
|
||||
with llm.deprecate_legacy_api():
|
||||
yield weakref.proxy(llm)
|
||||
yield weakref.proxy(llm)
|
||||
|
||||
del llm
|
||||
del llm
|
||||
|
||||
cleanup_dist_env_and_memory()
|
||||
|
||||
|
||||
def assert_outputs_match(o1: list[PoolingRequestOutput],
|
||||
o2: list[PoolingRequestOutput]):
|
||||
check_embeddings_close(
|
||||
embeddings_0_lst=[o.outputs.data for o in o1],
|
||||
embeddings_1_lst=[o.outputs.data for o in o2],
|
||||
name_0="hf",
|
||||
name_1="vllm",
|
||||
tol=1e-2,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skip_global_cleanup
|
||||
@pytest.mark.parametrize('prompt_token_ids', TOKEN_IDS)
|
||||
def test_v1_v2_api_consistency_single_prompt_tokens(llm: LLM,
|
||||
prompt_token_ids):
|
||||
pooling_params = PoolingParams()
|
||||
|
||||
with pytest.warns(DeprecationWarning, match="'prompt_token_ids'"):
|
||||
v1_output = llm.encode(prompt_token_ids=prompt_token_ids,
|
||||
pooling_params=pooling_params)
|
||||
|
||||
v2_output = llm.encode({"prompt_token_ids": prompt_token_ids},
|
||||
pooling_params=pooling_params)
|
||||
assert_outputs_match(v1_output, v2_output)
|
||||
|
||||
|
||||
@pytest.mark.skip_global_cleanup
|
||||
def test_v1_v2_api_consistency_multi_prompt_tokens(llm: LLM):
|
||||
pooling_params = PoolingParams()
|
||||
|
||||
with pytest.warns(DeprecationWarning, match="'prompt_token_ids'"):
|
||||
v1_output = llm.encode(prompt_token_ids=TOKEN_IDS,
|
||||
pooling_params=pooling_params)
|
||||
|
||||
v2_output = llm.encode(
|
||||
[{
|
||||
"prompt_token_ids": p
|
||||
} for p in TOKEN_IDS],
|
||||
pooling_params=pooling_params,
|
||||
)
|
||||
assert_outputs_match(v1_output, v2_output)
|
||||
|
||||
|
||||
@pytest.mark.skip_global_cleanup
|
||||
def test_multiple_pooling_params(llm: LLM):
|
||||
pooling_params = [
|
||||
|
||||
Reference in New Issue
Block a user