Remove V0 Encoder-Decoder Support (#24907)
Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai>
This commit is contained in:
@@ -167,8 +167,6 @@ def _test_processing_correctness(
|
||||
# incorrect token ids. So we need use `add_special_tokens=False` here
|
||||
# to leave bos_token to be added by the processor.
|
||||
_ADD_SPECIAL_TOKENS_OVERRIDES = {
|
||||
"donut": False,
|
||||
"mllama": False,
|
||||
"ovis": False,
|
||||
"ovis2_5": False,
|
||||
"paligemma": False,
|
||||
@@ -278,9 +276,7 @@ def _test_processing_correctness_one(
|
||||
"facebook/chameleon-7b",
|
||||
"CohereLabs/command-a-vision-07-2025",
|
||||
"deepseek-ai/deepseek-vl2-tiny",
|
||||
"naver-clova-ix/donut-base-finetuned-docvqa",
|
||||
"baidu/ERNIE-4.5-VL-28B-A3B-PT",
|
||||
"microsoft/Florence-2-base",
|
||||
"adept/fuyu-8b",
|
||||
"google/gemma-3-4b-it",
|
||||
"google/gemma-3n-E2B-it",
|
||||
@@ -305,7 +301,6 @@ def _test_processing_correctness_one(
|
||||
"llava-hf/llava-v1.6-mistral-7b-hf",
|
||||
"llava-hf/LLaVA-NeXT-Video-7B-hf",
|
||||
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
|
||||
"meta-llama/Llama-3.2-11B-Vision-Instruct",
|
||||
"TIGER-Lab/Mantis-8B-siglip-llama3",
|
||||
"mispeech/midashenglm-7b",
|
||||
"openbmb/MiniCPM-Llama3-V-2_5",
|
||||
|
||||
@@ -1,72 +0,0 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""Tests for mllama's multimodal preprocessing and profiling."""
|
||||
import pytest
|
||||
from transformers import MllamaConfig
|
||||
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
from vllm.multimodal.profiling import MultiModalProfiler
|
||||
|
||||
from ...utils import build_model_context
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_id",
|
||||
["meta-llama/Llama-3.2-11B-Vision-Instruct"])
|
||||
@pytest.mark.parametrize("max_model_len", [4096, 8192, 25600, 131072])
|
||||
@pytest.mark.parametrize("max_num_seqs", [1, 2, 8])
|
||||
def test_profiling(
|
||||
model_id: str,
|
||||
max_model_len: int,
|
||||
max_num_seqs: int,
|
||||
):
|
||||
# regression test for https://github.com/vllm-project/vllm/issues/13929
|
||||
from vllm.model_executor.models.mllama import calc_token_per_chunk
|
||||
|
||||
model_config_kwargs = {
|
||||
"max_model_len": max_model_len,
|
||||
}
|
||||
ctx = build_model_context(
|
||||
model_id,
|
||||
model_config_kwargs=model_config_kwargs,
|
||||
limit_mm_per_prompt={"image": 1},
|
||||
)
|
||||
|
||||
mm_config = ctx.get_mm_config()
|
||||
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
|
||||
profiler = MultiModalProfiler(processor)
|
||||
|
||||
dummy_encoder_data = profiler.get_encoder_dummy_data(
|
||||
max_model_len,
|
||||
mm_counts=mm_config.limit_per_prompt,
|
||||
)
|
||||
dummy_mm_data = processor.dummy_inputs.get_dummy_processor_inputs(
|
||||
max_model_len,
|
||||
mm_counts=mm_config.limit_per_prompt,
|
||||
)
|
||||
|
||||
hf_config = ctx.get_hf_config(MllamaConfig)
|
||||
image_size = hf_config.vision_config.image_size
|
||||
encoder_seq_lens = [len(dummy_encoder_data.prompt_token_ids)
|
||||
] * max_num_seqs
|
||||
|
||||
mm_data = processor.apply(
|
||||
prompt=dummy_mm_data.prompt,
|
||||
mm_data=dummy_mm_data.mm_data,
|
||||
hf_processor_mm_kwargs=dict(),
|
||||
)["mm_kwargs"].get_data()
|
||||
|
||||
# Get the actual number of encoder tokens for each sample.
|
||||
# Because attn_metadata.encoder_seq_lens only counts the last
|
||||
# group of images for each sample, which is used to cheat the
|
||||
# block manager to allocate blocks for those images only.
|
||||
# See MllamaMultiModalProcessor for more details.
|
||||
num_tiles = [[t] for t in mm_data.pop("num_tiles")]
|
||||
num_tokens_per_tile = calc_token_per_chunk(image_size)
|
||||
actual_encoder_seq_lens = [
|
||||
sum(num_tile) * num_tokens_per_tile for num_tile in num_tiles
|
||||
]
|
||||
|
||||
# simulate mllama image-present prefill.
|
||||
for actual_len, last_group_len in zip(actual_encoder_seq_lens,
|
||||
encoder_seq_lens):
|
||||
assert actual_len >= last_group_len
|
||||
@@ -31,7 +31,6 @@ from ...utils import dummy_hf_overrides
|
||||
|
||||
ARCH_TO_SKIP = {
|
||||
"MolmoForCausalLM": "incompatible requirements",
|
||||
"Florence2ForConditionalGeneration": "not supported in V1",
|
||||
}
|
||||
ARCH_NEEDS_EXTRAS = [
|
||||
"InternVLChatModel",
|
||||
|
||||
Reference in New Issue
Block a user