[V1][Quantization] Add CUDA graph compatible v1 GGUF support (#18646)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
Isotr0py
2025-05-27 12:40:28 +08:00
committed by GitHub
parent 1f88dbd2bb
commit 1f1b1bc03b
5 changed files with 188 additions and 59 deletions

View File

@@ -587,8 +587,6 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
param.shard_id.append(loaded_shard_id)
param.shard_id_map[loaded_shard_id] = len(param.data_container)
param.data_container.append(loaded_weight)
if len(param.data_container) == 2:
self.qweight = param.materialize_nested()
return
param_data = param.data
@@ -982,8 +980,6 @@ class QKVParallelLinear(ColumnParallelLinear):
param.shard_id.append(loaded_shard_id)
param.shard_id_map[loaded_shard_id] = len(param.data_container)
param.data_container.append(loaded_weight)
if len(param.data_container) == 3:
self.qweight = param.materialize_nested()
return
param_data = param.data