[Bugfix] Fix gemma3 with transformers backend (#23178)
Signed-off-by: raushan <raushan@huggingface.co> Signed-off-by: Raushan Turganbay <raushan@huggingface.co> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
c02058c222
commit
7cd95dc8a3
@@ -193,6 +193,20 @@ VLM_TEST_SETTINGS = {
|
||||
# when processing the 3rd prompt in vLLM
|
||||
marks=[pytest.mark.core_model, pytest.mark.skip(reason="Test hangs")],
|
||||
),
|
||||
# Gemma3 has bidirectional mask on images
|
||||
"gemma3-transformers": VLMTestInfo(
|
||||
models=["google/gemma-3-4b-it"],
|
||||
test_type=VLMTestType.IMAGE,
|
||||
prompt_formatter=lambda vid_prompt: f"<'<bos><start_of_turn>user\n{vid_prompt}<start_of_image><end_of_turn>\n<start_of_turn>model\n", # noqa: E501
|
||||
max_model_len=4096,
|
||||
auto_cls=AutoModelForImageTextToText,
|
||||
vllm_output_post_proc=model_utils.gemma3_vllm_to_hf_output,
|
||||
image_size_factors=[(0.25, 0.5, 1.0)],
|
||||
vllm_runner_kwargs={
|
||||
"model_impl": "transformers",
|
||||
},
|
||||
marks=[pytest.mark.core_model],
|
||||
),
|
||||
"idefics3-transformers": VLMTestInfo(
|
||||
models=["HuggingFaceTB/SmolVLM-256M-Instruct"],
|
||||
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
|
||||
|
||||
@@ -342,6 +342,29 @@ def gemma3_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
|
||||
return hf_model
|
||||
|
||||
|
||||
def gemma3_vllm_to_hf_output(vllm_output: RunnerOutput, model: str) -> RunnerOutput:
|
||||
"""Sanitize vllm output [gemma-3] to compare with hf output."""
|
||||
output_ids, output_str, out_logprobs = vllm_output
|
||||
|
||||
config = AutoConfig.from_pretrained(model)
|
||||
image_token_id = config.image_token_id
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model)
|
||||
eos_token_id = tokenizer.eos_token_id
|
||||
|
||||
hf_output_ids = [
|
||||
token_id
|
||||
for idx, token_id in enumerate(output_ids)
|
||||
if token_id != image_token_id
|
||||
]
|
||||
|
||||
hf_output_str = output_str
|
||||
if hf_output_ids[-1] == eos_token_id:
|
||||
hf_output_str = hf_output_str + tokenizer.decode(eos_token_id)
|
||||
|
||||
return hf_output_ids, hf_output_str, out_logprobs
|
||||
|
||||
|
||||
def glm4v_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
|
||||
"""Patches and returns an instance of the HfRunner to use for GLM4V."""
|
||||
hf_processor = hf_model.processor
|
||||
|
||||
Reference in New Issue
Block a user