From 98e1a43af7f1f83d978b1f39a5e7ebda093b56c1 Mon Sep 17 00:00:00 2001 From: kkyyxhll <110216194+kkyyxhll@users.noreply.github.com> Date: Tue, 7 Apr 2026 23:16:26 +0800 Subject: [PATCH] [Bugfix][Quantization] Fix PerTensorScale loading with tuple shard_id in MergedColumnParallelLinear (#38517) Signed-off-by: loukang --- vllm/model_executor/layers/linear.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py index 07dc2cb7f..975fedabd 100644 --- a/vllm/model_executor/layers/linear.py +++ b/vllm/model_executor/layers/linear.py @@ -910,7 +910,15 @@ class MergedColumnParallelLinear(ColumnParallelLinear): self.validate_shard_id(loaded_shard_id) if loaded_shard_id is None or isinstance(loaded_shard_id, tuple): if isinstance(param, PerTensorScaleParameter): - param.load_merged_column_weight(loaded_weight=loaded_weight, shard_id=0) + if isinstance(loaded_shard_id, tuple): + for idx in loaded_shard_id: + param.load_merged_column_weight( + loaded_weight=loaded_weight, shard_id=idx + ) + else: + param.load_merged_column_weight( + loaded_weight=loaded_weight, shard_id=0 + ) return elif type(param) in (RowvLLMParameter, BasevLLMParameter): param.load_merged_column_weight(loaded_weight=loaded_weight)