[Misc] unify variable for LLM instance (#20996)

Signed-off-by: Andy Xie <andy.xning@gmail.com>
This commit is contained in:
Ning Xie
2025-07-21 19:18:33 +08:00
committed by GitHub
parent e6b90a2805
commit d97841078b
53 changed files with 237 additions and 236 deletions

View File

@@ -30,7 +30,7 @@ class VllmMtebEncoder(mteb.Encoder):
def __init__(self, vllm_model):
super().__init__()
self.model = vllm_model
self.llm = vllm_model
self.rng = np.random.default_rng(seed=42)
def encode(
@@ -43,7 +43,7 @@ class VllmMtebEncoder(mteb.Encoder):
# issues by randomizing the order.
r = self.rng.permutation(len(sentences))
sentences = [sentences[i] for i in r]
outputs = self.model.embed(sentences, use_tqdm=False)
outputs = self.llm.embed(sentences, use_tqdm=False)
embeds = np.array(outputs)
embeds = embeds[np.argsort(r)]
return embeds
@@ -61,10 +61,10 @@ class VllmMtebEncoder(mteb.Encoder):
queries = [s[0] for s in sentences]
corpus = [s[1] for s in sentences]
outputs = self.model.score(queries,
corpus,
truncate_prompt_tokens=-1,
use_tqdm=False)
outputs = self.llm.score(queries,
corpus,
truncate_prompt_tokens=-1,
use_tqdm=False)
scores = np.array(outputs)
scores = scores[np.argsort(r)]
return scores
@@ -178,11 +178,11 @@ def mteb_test_embed_models(hf_runner,
if model_info.architecture:
assert (model_info.architecture
in vllm_model.model.llm_engine.model_config.architectures)
in vllm_model.llm.llm_engine.model_config.architectures)
vllm_main_score = run_mteb_embed_task(VllmMtebEncoder(vllm_model),
MTEB_EMBED_TASKS)
vllm_dtype = vllm_model.model.llm_engine.model_config.dtype
vllm_dtype = vllm_model.llm.llm_engine.model_config.dtype
with hf_runner(model_info.name,
is_sentence_transformer=True,
@@ -284,7 +284,7 @@ def mteb_test_rerank_models(hf_runner,
max_num_seqs=8,
**vllm_extra_kwargs) as vllm_model:
model_config = vllm_model.model.llm_engine.model_config
model_config = vllm_model.llm.llm_engine.model_config
if model_info.architecture:
assert (model_info.architecture in model_config.architectures)

View File

@@ -120,7 +120,7 @@ def test_gritlm_offline_embedding(vllm_runner):
task="embed",
max_model_len=MAX_MODEL_LEN,
) as vllm_model:
llm = vllm_model.model
llm = vllm_model.llm
d_rep = run_llm_encode(
llm,
@@ -167,7 +167,7 @@ def test_gritlm_offline_generate(monkeypatch: pytest.MonkeyPatch, vllm_runner):
task="generate",
max_model_len=MAX_MODEL_LEN,
) as vllm_model:
llm = vllm_model.model
llm = vllm_model.llm
sampling_params = SamplingParams(temperature=0.0, max_tokens=256)
outputs = llm.generate(input, sampling_params=sampling_params)

View File

@@ -87,10 +87,10 @@ def test_matryoshka(
task="embed",
dtype=dtype,
max_model_len=None) as vllm_model:
assert vllm_model.model.llm_engine.model_config.is_matryoshka
assert vllm_model.llm.llm_engine.model_config.is_matryoshka
matryoshka_dimensions = (
vllm_model.model.llm_engine.model_config.matryoshka_dimensions)
vllm_model.llm.llm_engine.model_config.matryoshka_dimensions)
assert matryoshka_dimensions is not None
if dimensions not in matryoshka_dimensions:

View File

@@ -23,7 +23,7 @@ max_model_len = int(original_max_position_embeddings * factor)
def test_default(model_info, vllm_runner):
with vllm_runner(model_info.name, task="embed",
max_model_len=None) as vllm_model:
model_config = vllm_model.model.llm_engine.model_config
model_config = vllm_model.llm.llm_engine.model_config
if model_info.name == "nomic-ai/nomic-embed-text-v2-moe":
# For nomic-embed-text-v2-moe the length is set to 512
# by sentence_bert_config.json.
@@ -38,7 +38,7 @@ def test_set_max_model_len_legal(model_info, vllm_runner):
# set max_model_len <= 512
with vllm_runner(model_info.name, task="embed",
max_model_len=256) as vllm_model:
model_config = vllm_model.model.llm_engine.model_config
model_config = vllm_model.llm.llm_engine.model_config
assert model_config.max_model_len == 256
# set 512 < max_model_len <= 2048
@@ -52,7 +52,7 @@ def test_set_max_model_len_legal(model_info, vllm_runner):
else:
with vllm_runner(model_info.name, task="embed",
max_model_len=1024) as vllm_model:
model_config = vllm_model.model.llm_engine.model_config
model_config = vllm_model.llm.llm_engine.model_config
assert model_config.max_model_len == 1024

View File

@@ -28,7 +28,7 @@ def test_smaller_truncation_size(vllm_runner,
with vllm_runner(model_name, task="embed",
max_model_len=max_model_len) as vllm_model:
vllm_output = vllm_model.model.encode(
vllm_output = vllm_model.llm.encode(
input_str, truncate_prompt_tokens=truncate_prompt_tokens)
prompt_tokens = vllm_output[0].prompt_token_ids
@@ -43,7 +43,7 @@ def test_max_truncation_size(vllm_runner,
with vllm_runner(model_name, task="embed",
max_model_len=max_model_len) as vllm_model:
vllm_output = vllm_model.model.encode(
vllm_output = vllm_model.llm.encode(
input_str, truncate_prompt_tokens=truncate_prompt_tokens)
prompt_tokens = vllm_output[0].prompt_token_ids
@@ -61,7 +61,7 @@ def test_bigger_truncation_size(vllm_runner,
model_name, task="embed",
max_model_len=max_model_len) as vllm_model:
llm_output = vllm_model.model.encode(
llm_output = vllm_model.llm.encode(
input_str, truncate_prompt_tokens=truncate_prompt_tokens)
assert llm_output == f"""truncate_prompt_tokens value