Remove unused kwargs from model definitions (#13555)
This commit is contained in:
@@ -29,7 +29,7 @@ import torch
|
||||
from torch import nn
|
||||
from transformers import PretrainedConfig
|
||||
|
||||
from vllm.attention import Attention, AttentionMetadata
|
||||
from vllm.attention import Attention
|
||||
from vllm.config import CacheConfig, VllmConfig
|
||||
from vllm.distributed import get_tensor_model_parallel_world_size
|
||||
from vllm.model_executor.layers.layernorm import RMSNorm
|
||||
@@ -129,8 +129,6 @@ class MiniCPM3Attention(nn.Module):
|
||||
self,
|
||||
positions: torch.Tensor,
|
||||
hidden_states: torch.Tensor,
|
||||
kv_cache: torch.Tensor,
|
||||
attn_metadata: AttentionMetadata,
|
||||
) -> torch.Tensor:
|
||||
q, _ = self.q_a_proj(hidden_states)
|
||||
q = self.q_a_layernorm(q)
|
||||
@@ -170,7 +168,7 @@ class MiniCPM3Attention(nn.Module):
|
||||
v, [0, self.qk_head_dim - self.v_head_dim],
|
||||
value=0).view(-1, self.num_local_heads * self.qk_head_dim)
|
||||
|
||||
attn_output = self.attn(q, k, v, kv_cache, attn_metadata)
|
||||
attn_output = self.attn(q, k, v)
|
||||
attn_output = attn_output.view(
|
||||
-1, self.num_local_heads,
|
||||
self.qk_head_dim)[..., :self.v_head_dim].reshape(
|
||||
|
||||
Reference in New Issue
Block a user