[Bugfix] Use ReplicatedLinear for SequenceClassification head (#23836)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
Isotr0py
2025-08-29 12:41:20 +08:00
committed by GitHub
parent c8b3b299c9
commit 98ac0cb32d
2 changed files with 4 additions and 8 deletions

View File

@@ -248,7 +248,7 @@ def as_seq_cls_model(cls: _T) -> _T:
return cls
# Lazy import
from vllm.model_executor.layers.linear import RowParallelLinear
from vllm.model_executor.layers.linear import ReplicatedLinear
from vllm.model_executor.layers.pooler import (ClassifierPooler,
DispatchPooler, Pooler,
PoolingMethod, PoolingType)
@@ -264,10 +264,9 @@ def as_seq_cls_model(cls: _T) -> _T:
config = vllm_config.model_config.hf_config
quant_config = vllm_config.quant_config
self.score = RowParallelLinear(
self.score = ReplicatedLinear(
config.hidden_size,
config.num_labels,
input_is_parallel=False,
bias=False,
params_dtype=torch.float32,
quant_config=quant_config,