[Bugfix] Fix Qwen3-VL-Reranker load. (#33298)
Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io>
Signed-off-by: wang.yuqi <noooop@126.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
(cherry picked from commit abb34ac43a)
This commit is contained in:
@@ -18,48 +18,32 @@ e.g.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import base64
|
import pprint
|
||||||
import json
|
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
from vllm.multimodal.utils import encode_image_url, fetch_image
|
||||||
def encode_base64_content_from_url(content_url: str) -> dict[str, str]:
|
|
||||||
"""Encode a content retrieved from a remote url to base64 format."""
|
|
||||||
|
|
||||||
with requests.get(content_url, headers=headers) as response:
|
|
||||||
response.raise_for_status()
|
|
||||||
result = base64.b64encode(response.content).decode("utf-8")
|
|
||||||
|
|
||||||
return {"url": f"data:image/jpeg;base64,{result}"}
|
|
||||||
|
|
||||||
|
|
||||||
headers = {"accept": "application/json", "Content-Type": "application/json"}
|
|
||||||
|
|
||||||
query = "A woman playing with her dog on a beach at sunset."
|
query = "A woman playing with her dog on a beach at sunset."
|
||||||
documents = {
|
document = (
|
||||||
"content": [
|
"A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, "
|
||||||
{
|
"as the dog offers its paw in a heartwarming display of companionship and trust."
|
||||||
"type": "text",
|
)
|
||||||
"text": (
|
image_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
|
||||||
"A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, "
|
documents = [
|
||||||
"as the dog offers its paw in a heartwarming display of companionship and trust."
|
{
|
||||||
),
|
"type": "text",
|
||||||
},
|
"text": document,
|
||||||
{
|
},
|
||||||
"type": "image_url",
|
{
|
||||||
"image_url": {
|
"type": "image_url",
|
||||||
"url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
|
"image_url": {"url": image_url},
|
||||||
},
|
},
|
||||||
},
|
{
|
||||||
{
|
"type": "image_url",
|
||||||
"type": "image_url",
|
"image_url": {"url": encode_image_url(fetch_image(image_url))},
|
||||||
"image_url": encode_base64_content_from_url(
|
},
|
||||||
"https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
|
]
|
||||||
),
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
@@ -74,23 +58,36 @@ def main(args):
|
|||||||
models_url = base_url + "/v1/models"
|
models_url = base_url + "/v1/models"
|
||||||
rerank_url = base_url + "/rerank"
|
rerank_url = base_url + "/rerank"
|
||||||
|
|
||||||
response = requests.get(models_url, headers=headers)
|
response = requests.get(models_url)
|
||||||
model = response.json()["data"][0]["id"]
|
model = response.json()["data"][0]["id"]
|
||||||
|
|
||||||
data = {
|
print("Query: string & Document: list of string")
|
||||||
|
prompt = {"model": model, "query": query, "documents": [document]}
|
||||||
|
response = requests.post(rerank_url, json=prompt)
|
||||||
|
pprint.pprint(response.json())
|
||||||
|
|
||||||
|
print("Query: string & Document: text")
|
||||||
|
prompt = {"model": model, "query": query, "documents": {"content": [documents[0]]}}
|
||||||
|
response = requests.post(rerank_url, json=prompt)
|
||||||
|
pprint.pprint(response.json())
|
||||||
|
|
||||||
|
print("Query: string & Document: image url")
|
||||||
|
prompt = {
|
||||||
"model": model,
|
"model": model,
|
||||||
"query": query,
|
"query": query,
|
||||||
"documents": documents,
|
"documents": {"content": [documents[1]]},
|
||||||
}
|
}
|
||||||
response = requests.post(rerank_url, headers=headers, json=data)
|
response = requests.post(rerank_url, json=prompt)
|
||||||
|
pprint.pprint(response.json())
|
||||||
|
|
||||||
# Check the response
|
print("Query: string & Document: image base64")
|
||||||
if response.status_code == 200:
|
prompt = {
|
||||||
print("Request successful!")
|
"model": model,
|
||||||
print(json.dumps(response.json(), indent=2))
|
"query": query,
|
||||||
else:
|
"documents": {"content": [documents[2]]},
|
||||||
print(f"Request failed with status code: {response.status_code}")
|
}
|
||||||
print(response.text)
|
response = requests.post(rerank_url, json=prompt)
|
||||||
|
pprint.pprint(response.json())
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -17,48 +17,32 @@ e.g.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import base64
|
|
||||||
import json
|
|
||||||
import pprint
|
import pprint
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
from vllm.multimodal.utils import encode_image_url, fetch_image
|
||||||
|
|
||||||
def encode_base64_content_from_url(content_url: str) -> dict[str, str]:
|
query = "A woman playing with her dog on a beach at sunset."
|
||||||
"""Encode a content retrieved from a remote url to base64 format."""
|
document = (
|
||||||
|
"A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, "
|
||||||
with requests.get(content_url, headers=headers) as response:
|
"as the dog offers its paw in a heartwarming display of companionship and trust."
|
||||||
response.raise_for_status()
|
)
|
||||||
result = base64.b64encode(response.content).decode("utf-8")
|
image_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
|
||||||
|
documents = [
|
||||||
return {"url": f"data:image/jpeg;base64,{result}"}
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": document,
|
||||||
headers = {"accept": "application/json", "Content-Type": "application/json"}
|
},
|
||||||
|
{
|
||||||
queries = "slm markdown"
|
"type": "image_url",
|
||||||
documents = {
|
"image_url": {"url": image_url},
|
||||||
"content": [
|
},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "image_url",
|
||||||
"image_url": {
|
"image_url": {"url": encode_image_url(fetch_image(image_url))},
|
||||||
"url": "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/handelsblatt-preview.png"
|
},
|
||||||
},
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "image_url",
|
|
||||||
"image_url": {
|
|
||||||
"url": "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/paper-11.png"
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "image_url",
|
|
||||||
"image_url": encode_base64_content_from_url(
|
|
||||||
"https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/paper-11.png"
|
|
||||||
),
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
@@ -73,15 +57,40 @@ def main(args):
|
|||||||
models_url = base_url + "/v1/models"
|
models_url = base_url + "/v1/models"
|
||||||
score_url = base_url + "/score"
|
score_url = base_url + "/score"
|
||||||
|
|
||||||
response = requests.get(models_url, headers=headers)
|
response = requests.get(models_url)
|
||||||
model = response.json()["data"][0]["id"]
|
model = response.json()["data"][0]["id"]
|
||||||
|
|
||||||
prompt = {"model": model, "queries": queries, "documents": documents}
|
print("Query: string & Document: string")
|
||||||
response = requests.post(score_url, headers=headers, json=prompt)
|
prompt = {"model": model, "queries": query, "documents": document}
|
||||||
print("\nPrompt when queries is string and documents is a image list:")
|
response = requests.post(score_url, json=prompt)
|
||||||
pprint.pprint(prompt)
|
pprint.pprint(response.json())
|
||||||
print("\nScore Response:")
|
|
||||||
print(json.dumps(response.json(), indent=2))
|
print("Query: string & Document: text")
|
||||||
|
prompt = {
|
||||||
|
"model": model,
|
||||||
|
"queries": query,
|
||||||
|
"documents": {"content": [documents[0]]},
|
||||||
|
}
|
||||||
|
response = requests.post(score_url, json=prompt)
|
||||||
|
pprint.pprint(response.json())
|
||||||
|
|
||||||
|
print("Query: string & Document: image url")
|
||||||
|
prompt = {
|
||||||
|
"model": model,
|
||||||
|
"queries": query,
|
||||||
|
"documents": {"content": [documents[1]]},
|
||||||
|
}
|
||||||
|
response = requests.post(score_url, json=prompt)
|
||||||
|
pprint.pprint(response.json())
|
||||||
|
|
||||||
|
print("Query: string & Document: image base64")
|
||||||
|
prompt = {
|
||||||
|
"model": model,
|
||||||
|
"queries": query,
|
||||||
|
"documents": {"content": [documents[2]]},
|
||||||
|
}
|
||||||
|
response = requests.post(score_url, json=prompt)
|
||||||
|
pprint.pprint(response.json())
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -5,9 +5,9 @@ import json
|
|||||||
import pytest
|
import pytest
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from tests.entrypoints.test_utils import encode_base64_content_from_url
|
|
||||||
from tests.utils import RemoteOpenAIServer
|
from tests.utils import RemoteOpenAIServer
|
||||||
from vllm.entrypoints.pooling.classify.protocol import ClassificationResponse
|
from vllm.entrypoints.pooling.classify.protocol import ClassificationResponse
|
||||||
|
from vllm.multimodal.utils import encode_image_url, fetch_image
|
||||||
|
|
||||||
MODEL_NAME = "muziyongshixin/Qwen2.5-VL-7B-for-VideoCls"
|
MODEL_NAME = "muziyongshixin/Qwen2.5-VL-7B-for-VideoCls"
|
||||||
MAXIMUM_VIDEOS = 1
|
MAXIMUM_VIDEOS = 1
|
||||||
@@ -19,7 +19,7 @@ HF_OVERRIDES = {
|
|||||||
}
|
}
|
||||||
input_text = "This product was excellent and exceeded my expectations"
|
input_text = "This product was excellent and exceeded my expectations"
|
||||||
image_url = "https://vllm-public-assets.s3.us-west-2.amazonaws.com/multimodal_asset/cat_snow.jpg"
|
image_url = "https://vllm-public-assets.s3.us-west-2.amazonaws.com/multimodal_asset/cat_snow.jpg"
|
||||||
image_base64 = encode_base64_content_from_url(image_url)
|
image_base64 = {"url": encode_image_url(fetch_image(image_url))}
|
||||||
video_url = "https://www.bogotobogo.com/python/OpenCV_Python/images/mean_shift_tracking/slow_traffic_small.mp4"
|
video_url = "https://www.bogotobogo.com/python/OpenCV_Python/images/mean_shift_tracking/slow_traffic_small.mp4"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
122
tests/entrypoints/pooling/score/test_online_score_vision.py
Normal file
122
tests/entrypoints/pooling/score/test_online_score_vision.py
Normal file
@@ -0,0 +1,122 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from tests.utils import VLLM_PATH, RemoteOpenAIServer
|
||||||
|
from vllm.entrypoints.pooling.score.protocol import ScoreResponse
|
||||||
|
from vllm.multimodal.utils import encode_image_url, fetch_image
|
||||||
|
|
||||||
|
MODEL_NAME = "Qwen/Qwen3-VL-Reranker-2B"
|
||||||
|
HF_OVERRIDES = {
|
||||||
|
"architectures": ["Qwen3VLForSequenceClassification"],
|
||||||
|
"classifier_from_token": ["no", "yes"],
|
||||||
|
"is_original_qwen3_reranker": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
query = "A cat standing in the snow."
|
||||||
|
image_url = "https://vllm-public-assets.s3.us-west-2.amazonaws.com/multimodal_asset/cat_snow.jpg"
|
||||||
|
documents = [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": query,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {"url": image_url},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {"url": encode_image_url(fetch_image(image_url))},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
def server():
|
||||||
|
args = [
|
||||||
|
"--enforce-eager",
|
||||||
|
"--max-model-len",
|
||||||
|
"8192",
|
||||||
|
"--chat-template",
|
||||||
|
str(VLLM_PATH / "examples/pooling/score/template/qwen3_vl_reranker.jinja"),
|
||||||
|
]
|
||||||
|
|
||||||
|
with RemoteOpenAIServer(
|
||||||
|
MODEL_NAME, args, override_hf_configs=HF_OVERRIDES
|
||||||
|
) as remote_server:
|
||||||
|
yield remote_server
|
||||||
|
|
||||||
|
|
||||||
|
def test_score_api_queries_str_documents_str(server: RemoteOpenAIServer):
|
||||||
|
queries = "What is the capital of France?"
|
||||||
|
documents = "The capital of France is Paris."
|
||||||
|
|
||||||
|
score_response = requests.post(
|
||||||
|
server.url_for("score"),
|
||||||
|
json={
|
||||||
|
"model": MODEL_NAME,
|
||||||
|
"queries": queries,
|
||||||
|
"documents": documents,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
score_response.raise_for_status()
|
||||||
|
score = ScoreResponse.model_validate(score_response.json())
|
||||||
|
|
||||||
|
assert score.id is not None
|
||||||
|
assert score.data is not None
|
||||||
|
assert len(score.data) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_score_api_queries_str_documents_text_content(server: RemoteOpenAIServer):
|
||||||
|
score_response = requests.post(
|
||||||
|
server.url_for("score"),
|
||||||
|
json={
|
||||||
|
"model": MODEL_NAME,
|
||||||
|
"queries": query,
|
||||||
|
"documents": {"content": [documents[0]]},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
score_response.raise_for_status()
|
||||||
|
score = ScoreResponse.model_validate(score_response.json())
|
||||||
|
|
||||||
|
assert score.id is not None
|
||||||
|
assert score.data is not None
|
||||||
|
assert len(score.data) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_score_api_queries_str_documents_image_url_content(server: RemoteOpenAIServer):
|
||||||
|
score_response = requests.post(
|
||||||
|
server.url_for("score"),
|
||||||
|
json={
|
||||||
|
"model": MODEL_NAME,
|
||||||
|
"queries": query,
|
||||||
|
"documents": {"content": [documents[1]]},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
score_response.raise_for_status()
|
||||||
|
score = ScoreResponse.model_validate(score_response.json())
|
||||||
|
|
||||||
|
assert score.id is not None
|
||||||
|
assert score.data is not None
|
||||||
|
assert len(score.data) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_score_api_queries_str_documents_image_base64_content(
|
||||||
|
server: RemoteOpenAIServer,
|
||||||
|
):
|
||||||
|
score_response = requests.post(
|
||||||
|
server.url_for("score"),
|
||||||
|
json={
|
||||||
|
"model": MODEL_NAME,
|
||||||
|
"queries": query,
|
||||||
|
"documents": {"content": [documents[2]]},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
score_response.raise_for_status()
|
||||||
|
score = ScoreResponse.model_validate(score_response.json())
|
||||||
|
|
||||||
|
assert score.id is not None
|
||||||
|
assert score.data is not None
|
||||||
|
assert len(score.data) == 1
|
||||||
@@ -1,9 +1,5 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
import base64
|
|
||||||
|
|
||||||
import requests
|
|
||||||
|
|
||||||
from vllm.entrypoints.utils import sanitize_message
|
from vllm.entrypoints.utils import sanitize_message
|
||||||
|
|
||||||
|
|
||||||
@@ -12,11 +8,3 @@ def test_sanitize_message():
|
|||||||
sanitize_message("<_io.BytesIO object at 0x7a95e299e750>")
|
sanitize_message("<_io.BytesIO object at 0x7a95e299e750>")
|
||||||
== "<_io.BytesIO object>"
|
== "<_io.BytesIO object>"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def encode_base64_content_from_url(content_url: str) -> dict[str, str]:
|
|
||||||
with requests.get(content_url) as response:
|
|
||||||
response.raise_for_status()
|
|
||||||
result = base64.b64encode(response.content).decode("utf-8")
|
|
||||||
|
|
||||||
return {"url": f"data:image/jpeg;base64,{result}"}
|
|
||||||
|
|||||||
@@ -466,6 +466,7 @@ def load_weights_using_from_2_way_softmax(
|
|||||||
|
|
||||||
language_model = _get_language_model_for_seq_cls(model)
|
language_model = _get_language_model_for_seq_cls(model)
|
||||||
is_vlm = language_model is not model
|
is_vlm = language_model is not model
|
||||||
|
using_vlm_head = is_vlm and hasattr(language_model, "score")
|
||||||
|
|
||||||
language_model.lm_head = ParallelLMHead(
|
language_model.lm_head = ParallelLMHead(
|
||||||
text_config.vocab_size, text_config.hidden_size, quant_config=quant_config
|
text_config.vocab_size, text_config.hidden_size, quant_config=quant_config
|
||||||
@@ -506,14 +507,16 @@ def load_weights_using_from_2_way_softmax(
|
|||||||
torch.float32
|
torch.float32
|
||||||
) - lm_head_weight.data[[false_id]].to(torch.float32)
|
) - lm_head_weight.data[[false_id]].to(torch.float32)
|
||||||
|
|
||||||
score_layer = language_model.score if is_vlm else model.score
|
score_layer = language_model.score if using_vlm_head else model.score
|
||||||
param = score_layer.weight
|
param = score_layer.weight
|
||||||
weight_loader = getattr(param, "weight_loader", default_weight_loader)
|
weight_loader = getattr(param, "weight_loader", default_weight_loader)
|
||||||
weight_loader(param, score_weight)
|
weight_loader(param, score_weight)
|
||||||
|
|
||||||
del language_model.lm_head
|
del language_model.lm_head
|
||||||
|
|
||||||
score_weight_name = "language_model.score.weight" if is_vlm else "score.weight"
|
score_weight_name = (
|
||||||
|
"language_model.score.weight" if using_vlm_head else "score.weight"
|
||||||
|
)
|
||||||
loaded_weights.add(score_weight_name)
|
loaded_weights.add(score_weight_name)
|
||||||
|
|
||||||
lm_head_name = "lm_head.weight"
|
lm_head_name = "lm_head.weight"
|
||||||
@@ -537,6 +540,7 @@ def load_weights_no_post_processing(model, weights: Iterable[tuple[str, torch.Te
|
|||||||
|
|
||||||
language_model = _get_language_model_for_seq_cls(model)
|
language_model = _get_language_model_for_seq_cls(model)
|
||||||
is_vlm = language_model is not model
|
is_vlm = language_model is not model
|
||||||
|
using_vlm_head = is_vlm and hasattr(language_model, "score")
|
||||||
|
|
||||||
language_model.lm_head = ParallelLMHead(
|
language_model.lm_head = ParallelLMHead(
|
||||||
text_config.vocab_size, text_config.hidden_size, quant_config=quant_config
|
text_config.vocab_size, text_config.hidden_size, quant_config=quant_config
|
||||||
@@ -572,14 +576,16 @@ def load_weights_no_post_processing(model, weights: Iterable[tuple[str, torch.Te
|
|||||||
token_ids = [tokenizer.convert_tokens_to_ids(t) for t in tokens]
|
token_ids = [tokenizer.convert_tokens_to_ids(t) for t in tokens]
|
||||||
score_weight = language_model.lm_head.weight.data[token_ids]
|
score_weight = language_model.lm_head.weight.data[token_ids]
|
||||||
|
|
||||||
score_layer = language_model.score if is_vlm else model.score
|
score_layer = language_model.score if using_vlm_head else model.score
|
||||||
param = score_layer.weight
|
param = score_layer.weight
|
||||||
weight_loader = getattr(param, "weight_loader", default_weight_loader)
|
weight_loader = getattr(param, "weight_loader", default_weight_loader)
|
||||||
weight_loader(param, score_weight)
|
weight_loader(param, score_weight)
|
||||||
|
|
||||||
del language_model.lm_head
|
del language_model.lm_head
|
||||||
|
|
||||||
score_weight_name = "language_model.score.weight" if is_vlm else "score.weight"
|
score_weight_name = (
|
||||||
|
"language_model.score.weight" if using_vlm_head else "score.weight"
|
||||||
|
)
|
||||||
loaded_weights.add(score_weight_name)
|
loaded_weights.add(score_weight_name)
|
||||||
|
|
||||||
lm_head_name = "lm_head.weight"
|
lm_head_name = "lm_head.weight"
|
||||||
|
|||||||
Reference in New Issue
Block a user