[Bugfix][v1] Fix step pooler implementation and step pooling usage in v1 (#19956)

Signed-off-by: Isotr0py <2037008807@qq.com>
2025-06-24 02:38:06 +08:00
parent 68aaeb3749
commit 61f4fc5dc6
14 changed files with 164 additions and 40 deletions
--- a/tests/models/multimodal/pooling/test_dse_qwen2_vl.py
+++ b/tests/models/multimodal/pooling/test_dse_qwen2_vl.py
@@ -98,7 +98,7 @@ def _run_test(
                     max_model_len=8192) as vllm_model:
        tokenizer = vllm_model.model.get_tokenizer()
        texts = [
-            # this is necessary because vllm_model.encode will not apply any
+            # this is necessary because vllm_model.embed will not apply any
            # templating to the prompt, and therefore lacks an image_pad
            # token unless one is inserted beforehand (the (28,28) image
            # above is converted to an image pad token by the chat template).
@@ -109,7 +109,7 @@ def _run_test(
            # vllm will replace the pad token with the actual image,
            # which may be a placeholder image, later.
        ]
-        vllm_outputs = vllm_model.encode(texts, images=input_images)
+        vllm_outputs = vllm_model.embed(texts, images=input_images)

    hf_outputs = []
    with hf_runner(model,