[Bugfix] Fix tensor parallel issue in Qwen3 reranker weight loading (#20682)
Signed-off-by: Isotr0py <2037008807@qq.com> Co-authored-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
@@ -268,7 +268,8 @@ def mteb_test_rerank_models(hf_runner,
|
||||
model_info: RerankModelInfo,
|
||||
vllm_extra_kwargs=None,
|
||||
hf_model_callback=None,
|
||||
vllm_mteb_encoder=VllmMtebEncoder):
|
||||
vllm_mteb_encoder=VllmMtebEncoder,
|
||||
atol=MTEB_RERANK_TOL):
|
||||
if not model_info.enable_test:
|
||||
# A model family has many models with the same architecture,
|
||||
# and we don't need to test each one.
|
||||
@@ -301,4 +302,4 @@ def mteb_test_rerank_models(hf_runner,
|
||||
print("SentenceTransformers:", st_dtype, st_main_score)
|
||||
print("Difference:", st_main_score - vllm_main_score)
|
||||
|
||||
assert st_main_score == pytest.approx(vllm_main_score, abs=MTEB_RERANK_TOL)
|
||||
assert st_main_score == pytest.approx(vllm_main_score, abs=atol)
|
||||
|
||||
@@ -6,6 +6,7 @@ import pytest
|
||||
import torch
|
||||
|
||||
from tests.conftest import HfRunner
|
||||
from tests.utils import multi_gpu_test
|
||||
|
||||
from .mteb_utils import RerankModelInfo, mteb_test_rerank_models
|
||||
|
||||
@@ -87,3 +88,29 @@ def test_rerank_models_mteb(vllm_runner, model_info: RerankModelInfo) -> None:
|
||||
|
||||
mteb_test_rerank_models(Qwen3RerankerHfRunner, vllm_runner, model_info,
|
||||
vllm_extra_kwargs)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_info", RERANK_MODELS)
|
||||
@multi_gpu_test(num_gpus=2)
|
||||
def test_rerank_models_mteb_tp(vllm_runner,
|
||||
model_info: RerankModelInfo) -> None:
|
||||
|
||||
assert model_info.architecture == "Qwen3ForSequenceClassification"
|
||||
|
||||
vllm_extra_kwargs: dict[str, Any] = {
|
||||
"hf_overrides": {
|
||||
"architectures": ["Qwen3ForSequenceClassification"],
|
||||
"classifier_from_token": ["no", "yes"],
|
||||
"is_original_qwen3_reranker": True,
|
||||
},
|
||||
"tensor_parallel_size": 2,
|
||||
}
|
||||
|
||||
if model_info.name == "Qwen/Qwen3-Reranker-4B":
|
||||
vllm_extra_kwargs["max_num_seqs"] = 1
|
||||
|
||||
mteb_test_rerank_models(Qwen3RerankerHfRunner,
|
||||
vllm_runner,
|
||||
model_info,
|
||||
vllm_extra_kwargs,
|
||||
atol=1.2e-2)
|
||||
|
||||
Reference in New Issue
Block a user