diff --git a/tests/models/language/pooling/test_nomic_max_model_len.py b/tests/models/language/pooling/test_nomic_max_model_len.py index d6216a87a..b5630ca96 100644 --- a/tests/models/language/pooling/test_nomic_max_model_len.py +++ b/tests/models/language/pooling/test_nomic_max_model_len.py @@ -8,7 +8,13 @@ import pytest from ...utils import EmbedModelInfo MODELS = [ - EmbedModelInfo("nomic-ai/nomic-embed-text-v1"), + EmbedModelInfo( + "nomic-ai/nomic-embed-text-v1", + # Fixme: + # Update nomic-embed code to support the latest + # HF version and remove revision set. + revision="720244025c1a7e15661a174c63cce63c8218e52b", + ), # EmbedModelInfo("nomic-ai/nomic-embed-text-v1.5"), # EmbedModelInfo("nomic-ai/CodeRankEmbed"), EmbedModelInfo("nomic-ai/nomic-embed-text-v2-moe"), @@ -24,7 +30,10 @@ max_model_len = int(original_max_position_embeddings * factor) @pytest.mark.parametrize("model_info", MODELS) def test_default(model_info, vllm_runner): with vllm_runner( - model_info.name, runner="pooling", max_model_len=None + model_info.name, + revision=model_info.revision, + runner="pooling", + max_model_len=None, ) as vllm_model: model_config = vllm_model.llm.llm_engine.model_config if model_info.name == "nomic-ai/nomic-embed-text-v2-moe": @@ -39,7 +48,10 @@ def test_default(model_info, vllm_runner): def test_set_max_model_len_legal(model_info, vllm_runner): # set max_model_len <= 512 with vllm_runner( - model_info.name, runner="pooling", max_model_len=256 + model_info.name, + revision=model_info.revision, + runner="pooling", + max_model_len=256, ) as vllm_model: model_config = vllm_model.llm.llm_engine.model_config assert model_config.max_model_len == 256 @@ -49,11 +61,19 @@ def test_set_max_model_len_legal(model_info, vllm_runner): # For nomic-embed-text-v2-moe the length is set to 512 # by sentence_bert_config.json. with pytest.raises(ValueError): - with vllm_runner(model_info.name, runner="pooling", max_model_len=1024): + with vllm_runner( + model_info.name, + revision=model_info.revision, + runner="pooling", + max_model_len=1024, + ): pass else: with vllm_runner( - model_info.name, runner="pooling", max_model_len=1024 + model_info.name, + revision=model_info.revision, + runner="pooling", + max_model_len=1024, ) as vllm_model: model_config = vllm_model.llm.llm_engine.model_config assert model_config.max_model_len == 1024 @@ -63,7 +83,12 @@ def test_set_max_model_len_legal(model_info, vllm_runner): def test_set_max_model_len_illegal(model_info, vllm_runner): # set max_model_len > 2048 with pytest.raises(ValueError): - with vllm_runner(model_info.name, runner="pooling", max_model_len=4096): + with vllm_runner( + model_info.name, + revision=model_info.revision, + runner="pooling", + max_model_len=4096, + ): pass # set max_model_len > 2048 by hf_overrides @@ -71,6 +96,7 @@ def test_set_max_model_len_illegal(model_info, vllm_runner): with pytest.raises(ValueError): with vllm_runner( model_info.name, + revision=model_info.revision, runner="pooling", max_model_len=None, hf_overrides=hf_overrides, @@ -91,7 +117,11 @@ def test_use_rope_scaling_legal(model_info, vllm_runner): } with vllm_runner( - model_info.name, runner="pooling", max_model_len=None, hf_overrides=hf_overrides + model_info.name, + revision=model_info.revision, + runner="pooling", + max_model_len=None, + hf_overrides=hf_overrides, ): pass @@ -110,6 +140,7 @@ def test_use_rope_scaling_illegal(model_info, vllm_runner): with pytest.raises(ValueError): with vllm_runner( model_info.name, + revision=model_info.revision, runner="pooling", max_model_len=max_model_len + 1, hf_overrides=hf_overrides, @@ -129,6 +160,7 @@ def test_use_rope_scaling_illegal(model_info, vllm_runner): with pytest.raises(ValueError): with vllm_runner( model_info.name, + revision=model_info.revision, runner="pooling", max_model_len=None, hf_overrides=hf_overrides, diff --git a/tests/models/language/pooling_mteb_test/mteb_embed_utils.py b/tests/models/language/pooling_mteb_test/mteb_embed_utils.py index da0b16449..34b758d22 100644 --- a/tests/models/language/pooling_mteb_test/mteb_embed_utils.py +++ b/tests/models/language/pooling_mteb_test/mteb_embed_utils.py @@ -151,6 +151,7 @@ def mteb_test_embed_models( with vllm_runner( model_info.name, + revision=model_info.revision, runner="pooling", max_model_len=model_info.max_model_len, **vllm_extra_kwargs, @@ -201,6 +202,7 @@ def mteb_test_embed_models( if model_info.mteb_score is None: with hf_runner( model_info.name, + revision=model_info.revision, is_sentence_transformer=True, dtype=ci_envs.VLLM_CI_HF_DTYPE or model_info.hf_dtype, ) as hf_model: diff --git a/tests/models/language/pooling_mteb_test/mteb_score_utils.py b/tests/models/language/pooling_mteb_test/mteb_score_utils.py index 621aff0e9..16081cbe2 100644 --- a/tests/models/language/pooling_mteb_test/mteb_score_utils.py +++ b/tests/models/language/pooling_mteb_test/mteb_score_utils.py @@ -241,6 +241,7 @@ def mteb_test_rerank_models( with vllm_runner( model_info.name, + revision=model_info.revision, runner="pooling", max_model_len=None, max_num_seqs=8, @@ -286,7 +287,9 @@ def mteb_test_rerank_models( # Accelerate mteb test by setting # SentenceTransformers mteb score to a constant if model_info.mteb_score is None: - with hf_runner(model_info.name, dtype=model_info.hf_dtype) as hf_model: + with hf_runner( + model_info.name, revision=model_info.revision, dtype=model_info.hf_dtype + ) as hf_model: hf_model.chat_template = chat_template st_main_score = run_mteb_rerank( hf_model, diff --git a/tests/models/language/pooling_mteb_test/test_nomic.py b/tests/models/language/pooling_mteb_test/test_nomic.py index fa987fab7..d7947f73f 100644 --- a/tests/models/language/pooling_mteb_test/test_nomic.py +++ b/tests/models/language/pooling_mteb_test/test_nomic.py @@ -12,6 +12,10 @@ MODELS = [ EmbedModelInfo( "nomic-ai/nomic-embed-text-v1", architecture="NomicBertModel", + # Fixme: + # Update nomic-embed code to support the latest + # HF version and remove revision set. + revision="720244025c1a7e15661a174c63cce63c8218e52b", mteb_score=0.737568559, enable_test=True, seq_pooling_type="MEAN", diff --git a/tests/models/utils.py b/tests/models/utils.py index 6d6636c96..3b94f34fa 100644 --- a/tests/models/utils.py +++ b/tests/models/utils.py @@ -375,6 +375,7 @@ def softmax(data): @dataclass class ModelInfo: name: str + revision: str | None = None architecture: str = "" dtype: str = "auto" max_model_len: int | None = None