[Bugfix][v1] Fix step pooler implementation and step pooling usage in v1 (#19956)
Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
@@ -46,7 +46,7 @@ def _run_test(
|
||||
# will hurt multiprocessing backend with fork method (the default method).
|
||||
with vllm_runner(model, task="embed", dtype=dtype,
|
||||
enforce_eager=True) as vllm_model:
|
||||
vllm_outputs = vllm_model.encode(input_texts, images=input_images)
|
||||
vllm_outputs = vllm_model.embed(input_texts, images=input_images)
|
||||
|
||||
# use eager mode for hf runner, since phi3_v didn't work with flash_attn
|
||||
hf_model_kwargs = {"_attn_implementation": "eager"}
|
||||
|
||||
Reference in New Issue
Block a user