diff --git a/docs/design/custom_op.md b/docs/design/custom_op.md index 034736ec6..a62d03307 100644 --- a/docs/design/custom_op.md +++ b/docs/design/custom_op.md @@ -54,6 +54,8 @@ For example: --8<-- "vllm/model_executor/layers/attention/mm_encoder_attention.py:mm_encoder_attn" --8<-- "vllm/model_executor/layers/mla.py:multi_head_latent_attention" + +--8<-- "vllm/model_executor/models/deepencoder.py:rel_pos_attention" ``` **2. Activation:** diff --git a/vllm/model_executor/models/deepencoder.py b/vllm/model_executor/models/deepencoder.py index f7ae4264f..68c101460 100644 --- a/vllm/model_executor/models/deepencoder.py +++ b/vllm/model_executor/models/deepencoder.py @@ -18,6 +18,7 @@ import torch.nn as nn import torch.nn.functional as F from transformers import CLIPVisionConfig +from vllm.model_executor.custom_op import PluggableLayer from vllm.model_executor.layers.attention import MMEncoderAttention from vllm.model_executor.layers.conv import Conv2dLayer from vllm.model_executor.layers.quantization import QuantizationConfig @@ -263,9 +264,13 @@ class Block(nn.Module): return x -class RelPosAttention(nn.Module): +# --8<-- [start:rel_pos_attention] +@PluggableLayer.register("rel_pos_attention") +class RelPosAttention(PluggableLayer): """Multi-head Attention block with relative position embeddings.""" + # --8<-- [end:rel_pos_attention] + def __init__( self, dim: int,