[CI Failure] pin nomic-embed-text-v1 revision (#39292)
Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io>
This commit is contained in:
@@ -8,7 +8,13 @@ import pytest
|
||||
from ...utils import EmbedModelInfo
|
||||
|
||||
MODELS = [
|
||||
EmbedModelInfo("nomic-ai/nomic-embed-text-v1"),
|
||||
EmbedModelInfo(
|
||||
"nomic-ai/nomic-embed-text-v1",
|
||||
# Fixme:
|
||||
# Update nomic-embed code to support the latest
|
||||
# HF version and remove revision set.
|
||||
revision="720244025c1a7e15661a174c63cce63c8218e52b",
|
||||
),
|
||||
# EmbedModelInfo("nomic-ai/nomic-embed-text-v1.5"),
|
||||
# EmbedModelInfo("nomic-ai/CodeRankEmbed"),
|
||||
EmbedModelInfo("nomic-ai/nomic-embed-text-v2-moe"),
|
||||
@@ -24,7 +30,10 @@ max_model_len = int(original_max_position_embeddings * factor)
|
||||
@pytest.mark.parametrize("model_info", MODELS)
|
||||
def test_default(model_info, vllm_runner):
|
||||
with vllm_runner(
|
||||
model_info.name, runner="pooling", max_model_len=None
|
||||
model_info.name,
|
||||
revision=model_info.revision,
|
||||
runner="pooling",
|
||||
max_model_len=None,
|
||||
) as vllm_model:
|
||||
model_config = vllm_model.llm.llm_engine.model_config
|
||||
if model_info.name == "nomic-ai/nomic-embed-text-v2-moe":
|
||||
@@ -39,7 +48,10 @@ def test_default(model_info, vllm_runner):
|
||||
def test_set_max_model_len_legal(model_info, vllm_runner):
|
||||
# set max_model_len <= 512
|
||||
with vllm_runner(
|
||||
model_info.name, runner="pooling", max_model_len=256
|
||||
model_info.name,
|
||||
revision=model_info.revision,
|
||||
runner="pooling",
|
||||
max_model_len=256,
|
||||
) as vllm_model:
|
||||
model_config = vllm_model.llm.llm_engine.model_config
|
||||
assert model_config.max_model_len == 256
|
||||
@@ -49,11 +61,19 @@ def test_set_max_model_len_legal(model_info, vllm_runner):
|
||||
# For nomic-embed-text-v2-moe the length is set to 512
|
||||
# by sentence_bert_config.json.
|
||||
with pytest.raises(ValueError):
|
||||
with vllm_runner(model_info.name, runner="pooling", max_model_len=1024):
|
||||
with vllm_runner(
|
||||
model_info.name,
|
||||
revision=model_info.revision,
|
||||
runner="pooling",
|
||||
max_model_len=1024,
|
||||
):
|
||||
pass
|
||||
else:
|
||||
with vllm_runner(
|
||||
model_info.name, runner="pooling", max_model_len=1024
|
||||
model_info.name,
|
||||
revision=model_info.revision,
|
||||
runner="pooling",
|
||||
max_model_len=1024,
|
||||
) as vllm_model:
|
||||
model_config = vllm_model.llm.llm_engine.model_config
|
||||
assert model_config.max_model_len == 1024
|
||||
@@ -63,7 +83,12 @@ def test_set_max_model_len_legal(model_info, vllm_runner):
|
||||
def test_set_max_model_len_illegal(model_info, vllm_runner):
|
||||
# set max_model_len > 2048
|
||||
with pytest.raises(ValueError):
|
||||
with vllm_runner(model_info.name, runner="pooling", max_model_len=4096):
|
||||
with vllm_runner(
|
||||
model_info.name,
|
||||
revision=model_info.revision,
|
||||
runner="pooling",
|
||||
max_model_len=4096,
|
||||
):
|
||||
pass
|
||||
|
||||
# set max_model_len > 2048 by hf_overrides
|
||||
@@ -71,6 +96,7 @@ def test_set_max_model_len_illegal(model_info, vllm_runner):
|
||||
with pytest.raises(ValueError):
|
||||
with vllm_runner(
|
||||
model_info.name,
|
||||
revision=model_info.revision,
|
||||
runner="pooling",
|
||||
max_model_len=None,
|
||||
hf_overrides=hf_overrides,
|
||||
@@ -91,7 +117,11 @@ def test_use_rope_scaling_legal(model_info, vllm_runner):
|
||||
}
|
||||
|
||||
with vllm_runner(
|
||||
model_info.name, runner="pooling", max_model_len=None, hf_overrides=hf_overrides
|
||||
model_info.name,
|
||||
revision=model_info.revision,
|
||||
runner="pooling",
|
||||
max_model_len=None,
|
||||
hf_overrides=hf_overrides,
|
||||
):
|
||||
pass
|
||||
|
||||
@@ -110,6 +140,7 @@ def test_use_rope_scaling_illegal(model_info, vllm_runner):
|
||||
with pytest.raises(ValueError):
|
||||
with vllm_runner(
|
||||
model_info.name,
|
||||
revision=model_info.revision,
|
||||
runner="pooling",
|
||||
max_model_len=max_model_len + 1,
|
||||
hf_overrides=hf_overrides,
|
||||
@@ -129,6 +160,7 @@ def test_use_rope_scaling_illegal(model_info, vllm_runner):
|
||||
with pytest.raises(ValueError):
|
||||
with vllm_runner(
|
||||
model_info.name,
|
||||
revision=model_info.revision,
|
||||
runner="pooling",
|
||||
max_model_len=None,
|
||||
hf_overrides=hf_overrides,
|
||||
|
||||
@@ -151,6 +151,7 @@ def mteb_test_embed_models(
|
||||
|
||||
with vllm_runner(
|
||||
model_info.name,
|
||||
revision=model_info.revision,
|
||||
runner="pooling",
|
||||
max_model_len=model_info.max_model_len,
|
||||
**vllm_extra_kwargs,
|
||||
@@ -201,6 +202,7 @@ def mteb_test_embed_models(
|
||||
if model_info.mteb_score is None:
|
||||
with hf_runner(
|
||||
model_info.name,
|
||||
revision=model_info.revision,
|
||||
is_sentence_transformer=True,
|
||||
dtype=ci_envs.VLLM_CI_HF_DTYPE or model_info.hf_dtype,
|
||||
) as hf_model:
|
||||
|
||||
@@ -241,6 +241,7 @@ def mteb_test_rerank_models(
|
||||
|
||||
with vllm_runner(
|
||||
model_info.name,
|
||||
revision=model_info.revision,
|
||||
runner="pooling",
|
||||
max_model_len=None,
|
||||
max_num_seqs=8,
|
||||
@@ -286,7 +287,9 @@ def mteb_test_rerank_models(
|
||||
# Accelerate mteb test by setting
|
||||
# SentenceTransformers mteb score to a constant
|
||||
if model_info.mteb_score is None:
|
||||
with hf_runner(model_info.name, dtype=model_info.hf_dtype) as hf_model:
|
||||
with hf_runner(
|
||||
model_info.name, revision=model_info.revision, dtype=model_info.hf_dtype
|
||||
) as hf_model:
|
||||
hf_model.chat_template = chat_template
|
||||
st_main_score = run_mteb_rerank(
|
||||
hf_model,
|
||||
|
||||
@@ -12,6 +12,10 @@ MODELS = [
|
||||
EmbedModelInfo(
|
||||
"nomic-ai/nomic-embed-text-v1",
|
||||
architecture="NomicBertModel",
|
||||
# Fixme:
|
||||
# Update nomic-embed code to support the latest
|
||||
# HF version and remove revision set.
|
||||
revision="720244025c1a7e15661a174c63cce63c8218e52b",
|
||||
mteb_score=0.737568559,
|
||||
enable_test=True,
|
||||
seq_pooling_type="MEAN",
|
||||
|
||||
@@ -375,6 +375,7 @@ def softmax(data):
|
||||
@dataclass
|
||||
class ModelInfo:
|
||||
name: str
|
||||
revision: str | None = None
|
||||
architecture: str = ""
|
||||
dtype: str = "auto"
|
||||
max_model_len: int | None = None
|
||||
|
||||
Reference in New Issue
Block a user