[Bugfix][v1] Fix step pooler implementation and step pooling usage in v1 (#19956)

Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
Isotr0py
2025-06-24 02:38:06 +08:00
committed by GitHub
parent 68aaeb3749
commit 61f4fc5dc6
14 changed files with 164 additions and 40 deletions

View File

@@ -46,7 +46,7 @@ def _run_test(
# will hurt multiprocessing backend with fork method (the default method).
with vllm_runner(model, task="embed", dtype=dtype,
enforce_eager=True) as vllm_model:
vllm_outputs = vllm_model.encode(input_texts, images=input_images)
vllm_outputs = vllm_model.embed(input_texts, images=input_images)
# use eager mode for hf runner, since phi3_v didn't work with flash_attn
hf_model_kwargs = {"_attn_implementation": "eager"}