[Model][2/N] Improve all pooling task | Support multi-vector retrieval (#25370)

Signed-off-by: wang.yuqi <noooop@126.com>
2025-10-15 19:14:41 +08:00
parent d4d1a6024f
commit f54f85129e
41 changed files with 786 additions and 399 deletions
--- a/tests/models/language/pooling/test_pooler_config_init_behaviour.py
+++ b/tests/models/language/pooling/test_pooler_config_init_behaviour.py
@@ -93,7 +93,7 @@ def test_embed_models_using_normalize(
    ],
 )
@pytest.mark.parametrize("dtype", ["half"])
-def test_reward_models_using_softmax(
+def test_reward_models_using_activation(
    hf_runner,
    vllm_runner,
    example_prompts,
@@ -104,22 +104,64 @@ def test_reward_models_using_softmax(
        model,
        max_model_len=1024,
        dtype=dtype,
-        pooler_config=PoolerConfig(softmax=False),
+        pooler_config=PoolerConfig(activation=False),
    ) as vllm_model:
-        wo_softmax = vllm_model.encode(example_prompts)
+        wo_activation = vllm_model.reward(example_prompts)

    with vllm_runner(
-        model, max_model_len=1024, dtype=dtype, pooler_config=PoolerConfig(softmax=True)
+        model,
+        max_model_len=1024,
+        dtype=dtype,
+        pooler_config=PoolerConfig(activation=True),
    ) as vllm_model:
-        w_softmax = vllm_model.encode(example_prompts)
+        w_activation = vllm_model.reward(example_prompts)

-    for wo, w in zip(wo_softmax, w_softmax):
+    for wo, w in zip(wo_activation, w_activation):
        wo = torch.tensor(wo)
        w = torch.tensor(w)

        assert not torch.allclose(wo, w, atol=1e-2), (
-            "pooler_config softmax is not working"
+            "pooler_config activation is not working"
        )
        assert torch.allclose(softmax(wo), w, atol=1e-2), (
-            "w_softmax should be close to softmax(wo_softmax)."
+            "w_activation should be close to activation(wo_activation)."
+        )
+
+
+@pytest.mark.parametrize(
+    "model",
+    [
+        "intfloat/multilingual-e5-small",
+    ],
+)
+@pytest.mark.parametrize("dtype", ["half"])
+def test_multi_vector_retrieval_models_using_normalize(
+    hf_runner,
+    vllm_runner,
+    example_prompts,
+    model: str,
+    dtype: str,
+) -> None:
+    with vllm_runner(
+        model,
+        max_model_len=512,
+        dtype=dtype,
+        pooler_config=PoolerConfig(normalize=False),
+    ) as vllm_model:
+        wo_normalize = vllm_model.token_embed(example_prompts)
+
+    with vllm_runner(
+        model,
+        max_model_len=512,
+        dtype=dtype,
+        pooler_config=PoolerConfig(normalize=True),
+    ) as vllm_model:
+        w_normalize = vllm_model.token_embed(example_prompts)
+
+    for wo, w in zip(wo_normalize, w_normalize):
+        assert not torch.allclose(wo, w, atol=1e-2), (
+            "pooler_config normalize is not working"
+        )
+        assert torch.allclose(F.normalize(wo, p=2, dim=-1), w, atol=1e-2), (
+            "w_normal should be close to normal(wo_normal)."
        )