From 98e1a43af7f1f83d978b1f39a5e7ebda093b56c1 Mon Sep 17 00:00:00 2001
From: kkyyxhll <110216194+kkyyxhll@users.noreply.github.com>
Date: Tue, 7 Apr 2026 23:16:26 +0800
Subject: [PATCH] [Bugfix][Quantization] Fix PerTensorScale loading with tuple
 shard_id in MergedColumnParallelLinear (#38517)

Signed-off-by: loukang <loukang@xiaohongshu.com>
---
 vllm/model_executor/layers/linear.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py
index 07dc2cb7f..975fedabd 100644
--- a/vllm/model_executor/layers/linear.py
+++ b/vllm/model_executor/layers/linear.py
@@ -910,7 +910,15 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
         self.validate_shard_id(loaded_shard_id)
         if loaded_shard_id is None or isinstance(loaded_shard_id, tuple):
             if isinstance(param, PerTensorScaleParameter):
-                param.load_merged_column_weight(loaded_weight=loaded_weight, shard_id=0)
+                if isinstance(loaded_shard_id, tuple):
+                    for idx in loaded_shard_id:
+                        param.load_merged_column_weight(
+                            loaded_weight=loaded_weight, shard_id=idx
+                        )
+                else:
+                    param.load_merged_column_weight(
+                        loaded_weight=loaded_weight, shard_id=0
+                    )
                 return
             elif type(param) in (RowvLLMParameter, BasevLLMParameter):
                 param.load_merged_column_weight(loaded_weight=loaded_weight)