[Misc] unify variable for LLM instance (#20996)

Signed-off-by: Andy Xie <andy.xning@gmail.com>
This commit is contained in:
Ning Xie
2025-07-21 19:18:33 +08:00
committed by GitHub
parent e6b90a2805
commit d97841078b
53 changed files with 237 additions and 236 deletions

View File

@@ -28,7 +28,7 @@ def vllm_model(vllm_runner):
def test_stop_reason(vllm_model, example_prompts):
tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL)
stop_token_id = tokenizer.convert_tokens_to_ids(STOP_STR)
llm = vllm_model.model
llm = vllm_model.llm
# test stop token
outputs = llm.generate(example_prompts,

View File

@@ -101,42 +101,42 @@ def _stop_token_id(llm):
def test_stop_strings():
# If V0, must set enforce_eager=False since we use
# async output processing below.
vllm_model = LLM(MODEL, enforce_eager=envs.VLLM_USE_V1)
llm = LLM(MODEL, enforce_eager=envs.VLLM_USE_V1)
if envs.VLLM_USE_V1:
_stop_basic(vllm_model)
_stop_basic(llm)
else:
_set_async_mode(vllm_model, True)
_stop_basic(vllm_model)
_set_async_mode(llm, True)
_stop_basic(llm)
_set_async_mode(vllm_model, False)
_stop_basic(vllm_model)
_set_async_mode(llm, False)
_stop_basic(llm)
if envs.VLLM_USE_V1:
_stop_multi_tokens(vllm_model)
_stop_multi_tokens(llm)
else:
_set_async_mode(vllm_model, True)
_stop_multi_tokens(vllm_model)
_set_async_mode(llm, True)
_stop_multi_tokens(llm)
_set_async_mode(vllm_model, False)
_stop_multi_tokens(vllm_model)
_set_async_mode(llm, False)
_stop_multi_tokens(llm)
if envs.VLLM_USE_V1:
_stop_partial_token(vllm_model)
_stop_partial_token(llm)
else:
_set_async_mode(vllm_model, True)
_stop_partial_token(vllm_model)
_set_async_mode(llm, True)
_stop_partial_token(llm)
_set_async_mode(vllm_model, False)
_stop_partial_token(vllm_model)
_set_async_mode(llm, False)
_stop_partial_token(llm)
if envs.VLLM_USE_V1:
# FIXME: this does not respect include_in_output=False
# _stop_token_id(vllm_model)
# _stop_token_id(llm)
pass
else:
_set_async_mode(vllm_model, True)
_stop_token_id(vllm_model)
_set_async_mode(llm, True)
_stop_token_id(llm)
_set_async_mode(vllm_model, False)
_stop_token_id(vllm_model)
_set_async_mode(llm, False)
_stop_token_id(llm)