[Bugfix] Fix tensor parallel issue in Qwen3 reranker weight loading (#20682)
Signed-off-by: Isotr0py <2037008807@qq.com> Co-authored-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
@@ -322,6 +322,8 @@ def load_weights_using_from_2_way_softmax(
|
||||
# refer to https://huggingface.co/Qwen/Qwen3-Reranker-0.6B/discussions/3
|
||||
from vllm.model_executor.layers.vocab_parallel_embedding import (
|
||||
ParallelLMHead)
|
||||
from vllm.model_executor.model_loader.weight_utils import (
|
||||
default_weight_loader)
|
||||
from vllm.model_executor.models.utils import AutoWeightsLoader
|
||||
|
||||
model_config = model.vllm_config.model_config
|
||||
@@ -329,8 +331,6 @@ def load_weights_using_from_2_way_softmax(
|
||||
tokens = cast(list[int], tokens)
|
||||
assert len(tokens) == 2
|
||||
|
||||
device = model.score.weight.device
|
||||
|
||||
if model.config.tie_word_embeddings:
|
||||
model.lm_head = model.model.embed_tokens
|
||||
else:
|
||||
@@ -349,10 +349,13 @@ def load_weights_using_from_2_way_softmax(
|
||||
|
||||
false_id = tokenizer.convert_tokens_to_ids(tokens[0])
|
||||
true_id = tokenizer.convert_tokens_to_ids(tokens[1])
|
||||
weight = model.lm_head.weight.data[true_id].to(device).to(
|
||||
torch.float32) - model.lm_head.weight.data[false_id].to(device).to(
|
||||
weight = model.lm_head.weight.data[[true_id]].to(
|
||||
torch.float32) - model.lm_head.weight.data[[false_id]].to(
|
||||
torch.float32)
|
||||
model.score.weight.data.copy_(weight)
|
||||
|
||||
param = model.score.weight
|
||||
weight_loader = getattr(param, "weight_loader", default_weight_loader)
|
||||
weight_loader(param, weight)
|
||||
|
||||
del model.lm_head
|
||||
loaded_weights.add("score.weight")
|
||||
|
||||
Reference in New Issue
Block a user