[Bugfix] Fix embedding assignment for InternVL-based models (#15086)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -36,11 +36,11 @@ class NVLMProcessor(BaseInternVLProcessor):
|
||||
def image_token_id(self) -> int:
|
||||
return self.tokenizer.get_vocab()[IMG_PAD]
|
||||
|
||||
def get_image_repl_features(
|
||||
def get_image_repl(
|
||||
self,
|
||||
feature_size: int,
|
||||
num_patches: Optional[int],
|
||||
) -> str:
|
||||
) -> PromptUpdateDetails[str]:
|
||||
if num_patches is None:
|
||||
raise NotImplementedError("Embedding inputs are not supported")
|
||||
|
||||
@@ -55,14 +55,9 @@ class NVLMProcessor(BaseInternVLProcessor):
|
||||
# We include the start and end as well because "<Image><tile" is
|
||||
# tokenized as ["<Image", "><", "tile"], resulting in assertion error
|
||||
# when trying to find "<tile" as a subsequence of "<Image><tile"
|
||||
return "<Image>" + features + "</Image>"
|
||||
repl = "<Image>" + features + "</Image>"
|
||||
|
||||
def get_image_repl_full(
|
||||
self,
|
||||
feature_size: int,
|
||||
num_patches: Optional[int],
|
||||
) -> str:
|
||||
return self.get_image_repl_features(feature_size, num_patches)
|
||||
return PromptUpdateDetails(full=repl, features=repl)
|
||||
|
||||
|
||||
class NVLMProcessingInfo(BaseInternVLProcessingInfo):
|
||||
@@ -180,11 +175,11 @@ class NVLMMultiModalProcessor(InternVLMultiModalProcessor[NVLMProcessingInfo]):
|
||||
if num_patches is not None:
|
||||
assert isinstance(num_patches, int)
|
||||
|
||||
repl = hf_processor.get_image_repl(feature_size, num_patches)
|
||||
|
||||
return PromptUpdateDetails(
|
||||
full=hf_processor.get_image_repl_full(feature_size,
|
||||
num_patches) + "\n",
|
||||
features=hf_processor.get_image_repl_features(
|
||||
feature_size, num_patches) + "\n",
|
||||
full=repl.full + "\n",
|
||||
features=repl.features + "\n",
|
||||
)
|
||||
|
||||
# See note in dummy data regarding why we have the extra newline
|
||||
|
||||
Reference in New Issue
Block a user