[Bugfix] Support other quantization methods in glm41v (#36321)
Signed-off-by: g00887675/loganJane <g00887675/loganJane73@hotmail.com> Co-authored-by: g00887675/loganJane <g00887675/loganJane73@hotmail.com> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
@@ -63,6 +63,9 @@ from vllm.model_executor.layers.linear import (
|
|||||||
RowParallelLinear,
|
RowParallelLinear,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.layers.quantization import QuantizationConfig
|
from vllm.model_executor.layers.quantization import QuantizationConfig
|
||||||
|
from vllm.model_executor.layers.quantization.compressed_tensors import (
|
||||||
|
compressed_tensors,
|
||||||
|
)
|
||||||
from vllm.model_executor.layers.rotary_embedding import get_rope
|
from vllm.model_executor.layers.rotary_embedding import get_rope
|
||||||
from vllm.model_executor.layers.rotary_embedding.common import (
|
from vllm.model_executor.layers.rotary_embedding.common import (
|
||||||
ApplyRotaryEmb,
|
ApplyRotaryEmb,
|
||||||
@@ -280,7 +283,9 @@ class Glm4vVisionAttention(nn.Module):
|
|||||||
bias=False,
|
bias=False,
|
||||||
quant_config=quant_config,
|
quant_config=quant_config,
|
||||||
# Change qkv prefix to align with GLM-4.5V-FP8 quantization cfg
|
# Change qkv prefix to align with GLM-4.5V-FP8 quantization cfg
|
||||||
prefix=f"{prefix}.qkv_proj" if quant_config else f"{prefix}.qkv",
|
prefix=f"{prefix}.qkv_proj"
|
||||||
|
if isinstance(quant_config, compressed_tensors.CompressedTensorsConfig)
|
||||||
|
else f"{prefix}.qkv",
|
||||||
disable_tp=use_data_parallel,
|
disable_tp=use_data_parallel,
|
||||||
)
|
)
|
||||||
self.proj = RowParallelLinear(
|
self.proj = RowParallelLinear(
|
||||||
|
|||||||
Reference in New Issue
Block a user