[Model] Support DeepSeek-OCR-2 (#33165)

Signed-off-by: liuli <ll407707@alibaba-inc.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: liuli <ll407707@alibaba-inc.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
RED
2026-02-02 14:24:10 +08:00
committed by GitHub
parent beb8899482
commit 808dd87b30
9 changed files with 1099 additions and 1 deletions

View File

@@ -79,6 +79,7 @@ class ImageEncoderViT(nn.Module):
rel_pos_zero_init: bool = True,
window_size: int = 0,
global_attn_indexes: tuple[int, ...] = (),
last_conv_output: int = 1024,
) -> None:
"""
Args:
@@ -155,7 +156,7 @@ class ImageEncoderViT(nn.Module):
256, 512, kernel_size=3, stride=2, padding=1, bias=False
)
self.net_3 = Conv2dLayer(
512, 1024, kernel_size=3, stride=2, padding=1, bias=False
512, last_conv_output, kernel_size=3, stride=2, padding=1, bias=False
)
def get_abs_pos(self, abs_pos: torch.Tensor, tgt_size: int):