feat(models): implement Google Gemma 4 architecture support (MoE, Multimodal, Reasoning, Tool-Use) (#38826)
Signed-off-by: Luciano Martins <lucianommartins@users.noreply.github.com> Signed-off-by: Luciano Martins <lucianomartins@google.com> Co-authored-by: Luciano Martins <lucianommartins@users.noreply.github.com> Co-authored-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
@@ -394,6 +394,22 @@ VLM_TEST_SETTINGS = {
|
||||
vllm_runner_kwargs={"mm_processor_kwargs": {"do_pan_and_scan": True}},
|
||||
patch_hf_runner=model_utils.gemma3_patch_hf_runner,
|
||||
),
|
||||
"gemma4": VLMTestInfo(
|
||||
models=["google/gemma-4-E2B-it"],
|
||||
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
|
||||
prompt_formatter=lambda img_prompt: f"<bos><start_of_turn>user\n{img_prompt}<end_of_turn>\n<start_of_turn>model\n", # noqa: E501
|
||||
single_image_prompts=IMAGE_ASSETS.prompts(
|
||||
{
|
||||
"stop_sign": "What's the content in the center of the image?",
|
||||
"cherry_blossom": "What is the season?",
|
||||
}
|
||||
),
|
||||
multi_image_prompt="Describe the two images in detail.",
|
||||
max_model_len=4096,
|
||||
max_num_seqs=2,
|
||||
auto_cls=AutoModelForImageTextToText,
|
||||
vllm_runner_kwargs={"limit_mm_per_prompt": {"image": 4}},
|
||||
),
|
||||
"granite_vision": VLMTestInfo(
|
||||
models=["ibm-granite/granite-vision-3.3-2b"],
|
||||
test_type=(VLMTestType.IMAGE),
|
||||
|
||||
44
tests/models/multimodal/processing/test_gemma4.py
Normal file
44
tests/models/multimodal/processing/test_gemma4.py
Normal file
@@ -0,0 +1,44 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
|
||||
from ....conftest import ImageTestAssets
|
||||
from ...utils import build_model_context
|
||||
|
||||
# TODO: to be updated to "google/gemma-4-e2b-it" once the models are available
|
||||
GEMMA4_MODEL_ID = "google/gemma-4-E2B-it"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_id", [GEMMA4_MODEL_ID])
|
||||
def test_limit_mm_per_prompt(
|
||||
image_assets: ImageTestAssets,
|
||||
model_id: str,
|
||||
):
|
||||
"""Test that limit_mm_per_prompt accurately restricts multiple images."""
|
||||
# We only allow 1 image
|
||||
ctx = build_model_context(
|
||||
model_id,
|
||||
mm_processor_kwargs={},
|
||||
limit_mm_per_prompt={"image": 1},
|
||||
)
|
||||
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
|
||||
|
||||
# Provide 2 images in the prompt
|
||||
prompt = "<image><image>"
|
||||
# image_assets usually has multiple images
|
||||
images = [asset.pil_image for asset in image_assets][:2]
|
||||
if len(images) < 2:
|
||||
images = [images[0], images[0]]
|
||||
|
||||
mm_data = {"image": images}
|
||||
|
||||
# Expect ValueError when exceeding limit
|
||||
with pytest.raises(ValueError, match="At most 1 image"):
|
||||
processor(
|
||||
prompt,
|
||||
mm_items=processor.info.parse_mm_data(mm_data),
|
||||
hf_processor_mm_kwargs={},
|
||||
)
|
||||
@@ -277,6 +277,10 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
|
||||
"google/gemma-2-9b", extras={"tiny": "google/gemma-2-2b-it"}
|
||||
),
|
||||
"Gemma3ForCausalLM": _HfExamplesInfo("google/gemma-3-1b-it"),
|
||||
"Gemma4ForCausalLM": _HfExamplesInfo(
|
||||
"google/gemma-4-E2B-it",
|
||||
min_transformers_version="5.0.0",
|
||||
),
|
||||
"Gemma3nForCausalLM": _HfExamplesInfo("google/gemma-3n-E2B-it"),
|
||||
"GlmForCausalLM": _HfExamplesInfo("zai-org/glm-4-9b-chat-hf"),
|
||||
"Glm4ForCausalLM": _HfExamplesInfo("zai-org/GLM-4-9B-0414"),
|
||||
@@ -813,6 +817,10 @@ _MULTIMODAL_EXAMPLE_MODELS = {
|
||||
),
|
||||
"FuyuForCausalLM": _HfExamplesInfo("adept/fuyu-8b"),
|
||||
"Gemma3ForConditionalGeneration": _HfExamplesInfo("google/gemma-3-4b-it"),
|
||||
"Gemma4ForConditionalGeneration": _HfExamplesInfo(
|
||||
"google/gemma-4-E2B-it",
|
||||
min_transformers_version="5.5.0",
|
||||
),
|
||||
"Gemma3nForConditionalGeneration": _HfExamplesInfo("google/gemma-3n-E2B-it"),
|
||||
"GlmAsrForConditionalGeneration": _HfExamplesInfo(
|
||||
"zai-org/GLM-ASR-Nano-2512",
|
||||
|
||||
Reference in New Issue
Block a user