Support encoder-only models without KV-Cache (#21270)

Signed-off-by: Max de Bayser <maxdebayser@gmail.com> Signed-off-by: Max de Bayser <mbayser@br.ibm.com> Co-authored-by: Russell Bryant <rbryant@redhat.com>
2025-07-26 10:09:52 -03:00
parent f27fdfc3ed
commit 1cd6eaba54
17 changed files with 352 additions and 99 deletions
--- a/tests/models/language/pooling/test_jina.py
+++ b/tests/models/language/pooling/test_jina.py
@@ -23,6 +23,14 @@ RERANK_MODELS = [
 ]


+@pytest.fixture(autouse=True)
+def v1(run_with_both_engines):
+    # Simple autouse wrapper to run both engines for each test
+    # This can be promoted up to conftest.py to run for every
+    # test in a package
+    pass
+
+
@pytest.mark.parametrize("model_info", EMBEDDING_MODELS)
 def test_embed_models_mteb(hf_runner, vllm_runner,
                           model_info: EmbedModelInfo) -> None: