[Misc] unify variable for LLM instance (#20996)
Signed-off-by: Andy Xie <andy.xning@gmail.com>
This commit is contained in:
@@ -180,8 +180,7 @@ def test_chat(
|
||||
) as vllm_model:
|
||||
outputs = []
|
||||
for msg in MSGS:
|
||||
output = vllm_model.model.chat(msg,
|
||||
sampling_params=SAMPLING_PARAMS)
|
||||
output = vllm_model.llm.chat(msg, sampling_params=SAMPLING_PARAMS)
|
||||
|
||||
outputs.extend(output)
|
||||
|
||||
@@ -217,7 +216,7 @@ def test_multi_modal_placeholders(vllm_runner, prompt,
|
||||
max_model_len=8192,
|
||||
limit_mm_per_prompt=LIMIT_MM_PER_PROMPT,
|
||||
) as vllm_model:
|
||||
outputs = vllm_model.model.generate(prompt)
|
||||
outputs = vllm_model.llm.generate(prompt)
|
||||
|
||||
assert len(outputs) == 1, f"{len(outputs)=}"
|
||||
output: RequestOutput = outputs[0]
|
||||
|
||||
@@ -106,7 +106,7 @@ def run_test(
|
||||
tensor_parallel_size=tensor_parallel_size,
|
||||
distributed_executor_backend=distributed_executor_backend,
|
||||
) as vllm_model:
|
||||
llm = vllm_model.model
|
||||
llm = vllm_model.llm
|
||||
|
||||
sampling_params = SamplingParams(
|
||||
temperature=0,
|
||||
|
||||
@@ -85,7 +85,7 @@ def run_test(
|
||||
enforce_eager=enforce_eager,
|
||||
task=task,
|
||||
**vllm_runner_kwargs_) as vllm_model:
|
||||
tokenizer = vllm_model.model.get_tokenizer()
|
||||
tokenizer = vllm_model.llm.get_tokenizer()
|
||||
|
||||
vllm_kwargs: dict[str, Any] = {}
|
||||
if get_stop_token_ids is not None:
|
||||
|
||||
@@ -96,7 +96,7 @@ def _run_test(
|
||||
dtype=dtype,
|
||||
enforce_eager=True,
|
||||
max_model_len=8192) as vllm_model:
|
||||
tokenizer = vllm_model.model.get_tokenizer()
|
||||
tokenizer = vllm_model.llm.get_tokenizer()
|
||||
texts = [
|
||||
# this is necessary because vllm_model.embed will not apply any
|
||||
# templating to the prompt, and therefore lacks an image_pad
|
||||
|
||||
@@ -56,7 +56,7 @@ def vllm_reranker(
|
||||
mm_processor_kwargs=mm_processor_kwargs,
|
||||
limit_mm_per_prompt=limit_mm_per_prompt,
|
||||
) as vllm_model:
|
||||
outputs = vllm_model.model.score(query, documents)
|
||||
outputs = vllm_model.llm.score(query, documents)
|
||||
|
||||
return [output.outputs.score for output in outputs]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user