[Bugfix] Merge MM embeddings by index instead of token IDs (#16229)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Signed-off-by: NickLucche <nlucches@redhat.com>
Signed-off-by: Roger Wang <hey@rogerw.io>
Co-authored-by: NickLucche <nlucches@redhat.com>
Co-authored-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
Cyrus Leung
2025-09-27 16:15:12 +08:00
committed by GitHub
parent 176173989a
commit 27d7638b94
80 changed files with 966 additions and 1139 deletions

View File

@@ -41,6 +41,13 @@ class VllmModel(Protocol[T_co]):
) -> None:
...
def get_input_embeddings(
self,
input_ids: torch.Tensor,
) -> torch.Tensor:
"""Apply token embeddings to `input_ids`."""
...
def forward(
self,
input_ids: torch.Tensor,
@@ -54,6 +61,19 @@ def _check_vllm_model_init(model: Union[type[object], object]) -> bool:
return supports_kw(model_init, "vllm_config")
def _check_vllm_model_get_input_embeddings(
model: Union[type[object], object]) -> bool:
model_get_input_embeddings = getattr(model, "get_input_embeddings", None)
if not callable(model_get_input_embeddings):
logger.warning(
"The model (%s) is missing the `get_input_embeddings` method.",
model,
)
return False
return True
def _check_vllm_model_forward(model: Union[type[object], object]) -> bool:
model_forward = getattr(model, "forward", None)
if not callable(model_forward):
@@ -88,7 +108,9 @@ def is_vllm_model(model: object) -> TypeIs[VllmModel]:
def is_vllm_model(
model: Union[type[object], object],
) -> Union[TypeIs[type[VllmModel]], TypeIs[VllmModel]]:
return _check_vllm_model_init(model) and _check_vllm_model_forward(model)
return (_check_vllm_model_init(model)
and _check_vllm_model_get_input_embeddings(model)
and _check_vllm_model_forward(model))
@runtime_checkable