[CI Failure] pin nomic-embed-text-v1 revision (#39292)
Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io>
This commit is contained in:
@@ -8,7 +8,13 @@ import pytest
|
|||||||
from ...utils import EmbedModelInfo
|
from ...utils import EmbedModelInfo
|
||||||
|
|
||||||
MODELS = [
|
MODELS = [
|
||||||
EmbedModelInfo("nomic-ai/nomic-embed-text-v1"),
|
EmbedModelInfo(
|
||||||
|
"nomic-ai/nomic-embed-text-v1",
|
||||||
|
# Fixme:
|
||||||
|
# Update nomic-embed code to support the latest
|
||||||
|
# HF version and remove revision set.
|
||||||
|
revision="720244025c1a7e15661a174c63cce63c8218e52b",
|
||||||
|
),
|
||||||
# EmbedModelInfo("nomic-ai/nomic-embed-text-v1.5"),
|
# EmbedModelInfo("nomic-ai/nomic-embed-text-v1.5"),
|
||||||
# EmbedModelInfo("nomic-ai/CodeRankEmbed"),
|
# EmbedModelInfo("nomic-ai/CodeRankEmbed"),
|
||||||
EmbedModelInfo("nomic-ai/nomic-embed-text-v2-moe"),
|
EmbedModelInfo("nomic-ai/nomic-embed-text-v2-moe"),
|
||||||
@@ -24,7 +30,10 @@ max_model_len = int(original_max_position_embeddings * factor)
|
|||||||
@pytest.mark.parametrize("model_info", MODELS)
|
@pytest.mark.parametrize("model_info", MODELS)
|
||||||
def test_default(model_info, vllm_runner):
|
def test_default(model_info, vllm_runner):
|
||||||
with vllm_runner(
|
with vllm_runner(
|
||||||
model_info.name, runner="pooling", max_model_len=None
|
model_info.name,
|
||||||
|
revision=model_info.revision,
|
||||||
|
runner="pooling",
|
||||||
|
max_model_len=None,
|
||||||
) as vllm_model:
|
) as vllm_model:
|
||||||
model_config = vllm_model.llm.llm_engine.model_config
|
model_config = vllm_model.llm.llm_engine.model_config
|
||||||
if model_info.name == "nomic-ai/nomic-embed-text-v2-moe":
|
if model_info.name == "nomic-ai/nomic-embed-text-v2-moe":
|
||||||
@@ -39,7 +48,10 @@ def test_default(model_info, vllm_runner):
|
|||||||
def test_set_max_model_len_legal(model_info, vllm_runner):
|
def test_set_max_model_len_legal(model_info, vllm_runner):
|
||||||
# set max_model_len <= 512
|
# set max_model_len <= 512
|
||||||
with vllm_runner(
|
with vllm_runner(
|
||||||
model_info.name, runner="pooling", max_model_len=256
|
model_info.name,
|
||||||
|
revision=model_info.revision,
|
||||||
|
runner="pooling",
|
||||||
|
max_model_len=256,
|
||||||
) as vllm_model:
|
) as vllm_model:
|
||||||
model_config = vllm_model.llm.llm_engine.model_config
|
model_config = vllm_model.llm.llm_engine.model_config
|
||||||
assert model_config.max_model_len == 256
|
assert model_config.max_model_len == 256
|
||||||
@@ -49,11 +61,19 @@ def test_set_max_model_len_legal(model_info, vllm_runner):
|
|||||||
# For nomic-embed-text-v2-moe the length is set to 512
|
# For nomic-embed-text-v2-moe the length is set to 512
|
||||||
# by sentence_bert_config.json.
|
# by sentence_bert_config.json.
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
with vllm_runner(model_info.name, runner="pooling", max_model_len=1024):
|
with vllm_runner(
|
||||||
|
model_info.name,
|
||||||
|
revision=model_info.revision,
|
||||||
|
runner="pooling",
|
||||||
|
max_model_len=1024,
|
||||||
|
):
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
with vllm_runner(
|
with vllm_runner(
|
||||||
model_info.name, runner="pooling", max_model_len=1024
|
model_info.name,
|
||||||
|
revision=model_info.revision,
|
||||||
|
runner="pooling",
|
||||||
|
max_model_len=1024,
|
||||||
) as vllm_model:
|
) as vllm_model:
|
||||||
model_config = vllm_model.llm.llm_engine.model_config
|
model_config = vllm_model.llm.llm_engine.model_config
|
||||||
assert model_config.max_model_len == 1024
|
assert model_config.max_model_len == 1024
|
||||||
@@ -63,7 +83,12 @@ def test_set_max_model_len_legal(model_info, vllm_runner):
|
|||||||
def test_set_max_model_len_illegal(model_info, vllm_runner):
|
def test_set_max_model_len_illegal(model_info, vllm_runner):
|
||||||
# set max_model_len > 2048
|
# set max_model_len > 2048
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
with vllm_runner(model_info.name, runner="pooling", max_model_len=4096):
|
with vllm_runner(
|
||||||
|
model_info.name,
|
||||||
|
revision=model_info.revision,
|
||||||
|
runner="pooling",
|
||||||
|
max_model_len=4096,
|
||||||
|
):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# set max_model_len > 2048 by hf_overrides
|
# set max_model_len > 2048 by hf_overrides
|
||||||
@@ -71,6 +96,7 @@ def test_set_max_model_len_illegal(model_info, vllm_runner):
|
|||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
with vllm_runner(
|
with vllm_runner(
|
||||||
model_info.name,
|
model_info.name,
|
||||||
|
revision=model_info.revision,
|
||||||
runner="pooling",
|
runner="pooling",
|
||||||
max_model_len=None,
|
max_model_len=None,
|
||||||
hf_overrides=hf_overrides,
|
hf_overrides=hf_overrides,
|
||||||
@@ -91,7 +117,11 @@ def test_use_rope_scaling_legal(model_info, vllm_runner):
|
|||||||
}
|
}
|
||||||
|
|
||||||
with vllm_runner(
|
with vllm_runner(
|
||||||
model_info.name, runner="pooling", max_model_len=None, hf_overrides=hf_overrides
|
model_info.name,
|
||||||
|
revision=model_info.revision,
|
||||||
|
runner="pooling",
|
||||||
|
max_model_len=None,
|
||||||
|
hf_overrides=hf_overrides,
|
||||||
):
|
):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -110,6 +140,7 @@ def test_use_rope_scaling_illegal(model_info, vllm_runner):
|
|||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
with vllm_runner(
|
with vllm_runner(
|
||||||
model_info.name,
|
model_info.name,
|
||||||
|
revision=model_info.revision,
|
||||||
runner="pooling",
|
runner="pooling",
|
||||||
max_model_len=max_model_len + 1,
|
max_model_len=max_model_len + 1,
|
||||||
hf_overrides=hf_overrides,
|
hf_overrides=hf_overrides,
|
||||||
@@ -129,6 +160,7 @@ def test_use_rope_scaling_illegal(model_info, vllm_runner):
|
|||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
with vllm_runner(
|
with vllm_runner(
|
||||||
model_info.name,
|
model_info.name,
|
||||||
|
revision=model_info.revision,
|
||||||
runner="pooling",
|
runner="pooling",
|
||||||
max_model_len=None,
|
max_model_len=None,
|
||||||
hf_overrides=hf_overrides,
|
hf_overrides=hf_overrides,
|
||||||
|
|||||||
@@ -151,6 +151,7 @@ def mteb_test_embed_models(
|
|||||||
|
|
||||||
with vllm_runner(
|
with vllm_runner(
|
||||||
model_info.name,
|
model_info.name,
|
||||||
|
revision=model_info.revision,
|
||||||
runner="pooling",
|
runner="pooling",
|
||||||
max_model_len=model_info.max_model_len,
|
max_model_len=model_info.max_model_len,
|
||||||
**vllm_extra_kwargs,
|
**vllm_extra_kwargs,
|
||||||
@@ -201,6 +202,7 @@ def mteb_test_embed_models(
|
|||||||
if model_info.mteb_score is None:
|
if model_info.mteb_score is None:
|
||||||
with hf_runner(
|
with hf_runner(
|
||||||
model_info.name,
|
model_info.name,
|
||||||
|
revision=model_info.revision,
|
||||||
is_sentence_transformer=True,
|
is_sentence_transformer=True,
|
||||||
dtype=ci_envs.VLLM_CI_HF_DTYPE or model_info.hf_dtype,
|
dtype=ci_envs.VLLM_CI_HF_DTYPE or model_info.hf_dtype,
|
||||||
) as hf_model:
|
) as hf_model:
|
||||||
|
|||||||
@@ -241,6 +241,7 @@ def mteb_test_rerank_models(
|
|||||||
|
|
||||||
with vllm_runner(
|
with vllm_runner(
|
||||||
model_info.name,
|
model_info.name,
|
||||||
|
revision=model_info.revision,
|
||||||
runner="pooling",
|
runner="pooling",
|
||||||
max_model_len=None,
|
max_model_len=None,
|
||||||
max_num_seqs=8,
|
max_num_seqs=8,
|
||||||
@@ -286,7 +287,9 @@ def mteb_test_rerank_models(
|
|||||||
# Accelerate mteb test by setting
|
# Accelerate mteb test by setting
|
||||||
# SentenceTransformers mteb score to a constant
|
# SentenceTransformers mteb score to a constant
|
||||||
if model_info.mteb_score is None:
|
if model_info.mteb_score is None:
|
||||||
with hf_runner(model_info.name, dtype=model_info.hf_dtype) as hf_model:
|
with hf_runner(
|
||||||
|
model_info.name, revision=model_info.revision, dtype=model_info.hf_dtype
|
||||||
|
) as hf_model:
|
||||||
hf_model.chat_template = chat_template
|
hf_model.chat_template = chat_template
|
||||||
st_main_score = run_mteb_rerank(
|
st_main_score = run_mteb_rerank(
|
||||||
hf_model,
|
hf_model,
|
||||||
|
|||||||
@@ -12,6 +12,10 @@ MODELS = [
|
|||||||
EmbedModelInfo(
|
EmbedModelInfo(
|
||||||
"nomic-ai/nomic-embed-text-v1",
|
"nomic-ai/nomic-embed-text-v1",
|
||||||
architecture="NomicBertModel",
|
architecture="NomicBertModel",
|
||||||
|
# Fixme:
|
||||||
|
# Update nomic-embed code to support the latest
|
||||||
|
# HF version and remove revision set.
|
||||||
|
revision="720244025c1a7e15661a174c63cce63c8218e52b",
|
||||||
mteb_score=0.737568559,
|
mteb_score=0.737568559,
|
||||||
enable_test=True,
|
enable_test=True,
|
||||||
seq_pooling_type="MEAN",
|
seq_pooling_type="MEAN",
|
||||||
|
|||||||
@@ -375,6 +375,7 @@ def softmax(data):
|
|||||||
@dataclass
|
@dataclass
|
||||||
class ModelInfo:
|
class ModelInfo:
|
||||||
name: str
|
name: str
|
||||||
|
revision: str | None = None
|
||||||
architecture: str = ""
|
architecture: str = ""
|
||||||
dtype: str = "auto"
|
dtype: str = "auto"
|
||||||
max_model_len: int | None = None
|
max_model_len: int | None = None
|
||||||
|
|||||||
Reference in New Issue
Block a user