[Bugfix] Fix compressed-tensors quantization failure for DeepSeek-R1 on MI300x (#36247)
Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
This commit is contained in:
@@ -756,7 +756,7 @@ direct_register_custom_op(
|
||||
)
|
||||
|
||||
|
||||
class DeepSeekV2FusedQkvAProj(MergedColumnParallelLinear):
|
||||
class DeepSeekV2FusedQkvAProjLinear(MergedColumnParallelLinear):
|
||||
def __init__(
|
||||
self,
|
||||
input_size: int,
|
||||
@@ -848,7 +848,7 @@ class DeepseekV2MLAAttention(nn.Module):
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
|
||||
if self.q_lora_rank is not None:
|
||||
self.fused_qkv_a_proj = DeepSeekV2FusedQkvAProj(
|
||||
self.fused_qkv_a_proj = DeepSeekV2FusedQkvAProjLinear(
|
||||
self.hidden_size,
|
||||
[self.q_lora_rank, self.kv_lora_rank + self.qk_rope_head_dim],
|
||||
quant_config=quant_config,
|
||||
|
||||
Reference in New Issue
Block a user