Allow Gemma3 to take image embeddings (#28483)

Signed-off-by: tingtinggithub <streamttt@gmail.com>
This commit is contained in:
tingtinggithub
2025-11-15 04:18:08 -08:00
committed by GitHub
parent f36292dbee
commit cb15ee28db
4 changed files with 69 additions and 29 deletions

View File

@@ -14,6 +14,7 @@ from vllm.lora.request import LoRARequest
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
from vllm.multimodal.cache import processor_cache_from_config
from vllm.multimodal.inputs import MultiModalFeatureSpec, MultiModalUUIDDict
from vllm.multimodal.parse import MultiModalDataParser
from vllm.multimodal.processing import EncDecMultiModalProcessor
from vllm.multimodal.utils import argsort_mm_positions
from vllm.pooling_params import PoolingParams
@@ -340,7 +341,12 @@ class Processor:
mm_uuids: dict[str, list[str | None] | str] = {}
for modality, data in mm_data.items():
n = len(data) if isinstance(data, list) else 1
# Hash each item for embedding inputs.
n = (
len(data)
if isinstance(data, list) or MultiModalDataParser.is_embeddings(data)
else 1
)
mm_uuids[modality] = [f"{request_id}-{modality}-{i}" for i in range(n)]
return mm_uuids