[Model][VLM] Initialize support for Mono-InternVL model (#9528)

This commit is contained in:
Isotr0py
2024-10-23 00:01:46 +08:00
committed by GitHub
parent 9dbcce84a7
commit bb392ea2d2
6 changed files with 253 additions and 27 deletions

View File

@@ -97,6 +97,37 @@ class InternVisionEmbeddings(nn.Module):
return embeddings
class InternVisionPatchModel(nn.Module):
def __init__(self, config: PretrainedConfig):
super().__init__()
self.config = config
self.embeddings = InternVisionEmbeddings(config)
def get_input_embeddings(self):
return self.embeddings
def forward(
self,
pixel_values: Optional[torch.Tensor] = None,
pixel_embeds: Optional[torch.Tensor] = None,
) -> torch.FloatTensor:
if pixel_values is None and pixel_embeds is None:
raise ValueError(
'You have to specify pixel_values or pixel_embeds')
if pixel_embeds is not None:
hidden_states = pixel_embeds
elif pixel_values is not None:
if pixel_values.ndim == 4:
hidden_states = self.embeddings(pixel_values)
else:
raise ValueError(
f'wrong pixel_values size: {pixel_values.shape}')
return hidden_states
class InternParallelAttention(nn.Module):
"""Multi-headed attention from 'Attention Is All You Need' paper"""