[Bugfix][VLM] Fix incompatibility between #7902 and #7230 (#7948)

This commit is contained in:
Cyrus Leung
2024-08-28 23:11:18 +08:00
committed by GitHub
parent 98c12cffe5
commit ef9baee3c5
10 changed files with 120 additions and 92 deletions

View File

@@ -40,13 +40,13 @@ BLIP2_IMAGE_TOKEN_ID = 50265
class Blip2ImagePixelInputs(TypedDict):
type: Literal["pixel_values"]
data: torch.Tensor
"""Shape: (batch_size, num_channels, height, width)"""
"""Shape: `(batch_size * num_images, num_channels, height, width)`"""
class Blip2ImageEmbeddingInputs(TypedDict):
type: Literal["image_embeds"]
data: torch.Tensor
"""Shape: `(batch_size, image_feature_size, hidden_size)`
"""Shape: `(batch_size * num_images, image_feature_size, hidden_size)`
`hidden_size` must match the hidden size of language model backbone.
"""