[DeepSeek] Fix DeepSeek V3.2 Rope Embedding (#28968)

Signed-off-by: Yongye Zhu <zyy1102000@gmail.com>
This commit is contained in:
Yongye Zhu
2025-11-19 16:30:04 -05:00
committed by GitHub
parent 613abb50d5
commit 88f5b19f0b
2 changed files with 17 additions and 3 deletions

View File

@@ -837,8 +837,8 @@ class Indexer(nn.Module):
)
q_pe, k_pe = rotary_emb(positions, q_pe, k_pe.unsqueeze(1))
q = torch.cat([q_pe, q_nope], dim=-1)
k = torch.cat([k_pe.squeeze(1), k_nope], dim=-1)
q = torch.cat([q_pe.squeeze(0), q_nope], dim=-1)
k = torch.cat([k_pe.squeeze((0, 2)), k_nope], dim=-1)
# we only quant q here since k quant is fused with cache insertion
q = q.view(-1, self.head_dim)
@@ -987,6 +987,14 @@ class DeepseekV2MLAAttention(nn.Module):
self.is_v32 = hasattr(config, "index_topk")
if self.is_v32:
self.indexer_rope_emb = get_rope(
qk_rope_head_dim,
rotary_dim=qk_rope_head_dim,
max_position=max_position_embeddings,
base=rope_theta,
rope_scaling=rope_scaling,
is_neox_style=True,
)
self.indexer = Indexer(
vllm_config,
config,
@@ -998,6 +1006,7 @@ class DeepseekV2MLAAttention(nn.Module):
f"{prefix}.indexer",
)
else:
self.indexer_rope_emb = None
self.indexer = None
mla_modules = MLAModules(
@@ -1015,6 +1024,7 @@ class DeepseekV2MLAAttention(nn.Module):
q_b_proj=self.q_b_proj if self.q_lora_rank is not None else None,
q_proj=self.q_proj if self.q_lora_rank is None else None,
indexer=self.indexer,
indexer_rotary_emb=self.indexer_rope_emb,
is_sparse=self.is_v32,
topk_indices_buffer=topk_indices_buffer,
)