diff --git a/tests/models/multimodal/pooling/test_colmodernvbert.py b/tests/models/multimodal/pooling/test_colmodernvbert.py index efeb3195b..6e9dce7ab 100644 --- a/tests/models/multimodal/pooling/test_colmodernvbert.py +++ b/tests/models/multimodal/pooling/test_colmodernvbert.py @@ -15,6 +15,10 @@ from vllm.entrypoints.pooling.scoring.utils import compute_maxsim_score MODEL_NAME = "ModernVBERT/colmodernvbert-merged" COLBERT_DIM = 128 DTYPE = "half" +# Fixme: +# Update colmodernvbert code to support the latest HF version +# and remove revision set. +REVISION = "4a0a9f3ac7a7992fec410bfa8e3d080ac9a5bcee" # ----------------------------------------------------------------------- @@ -26,6 +30,7 @@ def test_colmodernvbert_text_token_embed(vllm_runner): """Text query produces per-token embeddings with shape (seq_len, 128).""" with vllm_runner( MODEL_NAME, + revision=REVISION, runner="pooling", dtype=DTYPE, enforce_eager=True, @@ -49,6 +54,7 @@ def test_colmodernvbert_text_relevance_ordering(vllm_runner): with vllm_runner( MODEL_NAME, + revision=REVISION, runner="pooling", dtype=DTYPE, enforce_eager=True, @@ -66,6 +72,7 @@ def test_colmodernvbert_text_late_interaction(vllm_runner): with vllm_runner( MODEL_NAME, + revision=REVISION, runner="pooling", dtype=DTYPE, enforce_eager=True, @@ -92,6 +99,7 @@ def test_colmodernvbert_image_token_embed(vllm_runner, image_assets): """Image input produces per-token embeddings including vision tokens.""" with vllm_runner( MODEL_NAME, + revision=REVISION, runner="pooling", dtype=DTYPE, enforce_eager=True, diff --git a/tests/models/registry.py b/tests/models/registry.py index 263012e49..98c2a0410 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -636,6 +636,7 @@ _LATE_INTERACTION_EXAMPLE_MODELS = { # [Multimodal] "ColModernVBertForRetrieval": _HfExamplesInfo( "ModernVBERT/colmodernvbert-merged", + revision="4a0a9f3ac7a7992fec410bfa8e3d080ac9a5bcee", ), "ColPaliForRetrieval": _HfExamplesInfo("vidore/colpali-v1.3-hf"), "ColQwen3": _HfExamplesInfo(