[V1] Remove legacy input registry (#15673)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-03-28 14:34:34 +08:00
committed by GitHub
parent 8693e47e6a
commit 355f66348c
17 changed files with 132 additions and 153 deletions

View File

@@ -15,7 +15,6 @@ from vllm.attention.layer import Attention
from vllm.config import CompilationLevel, VllmConfig
from vllm.distributed.parallel_state import get_pp_group, graph_capture
from vllm.forward_context import set_forward_context
from vllm.inputs import INPUT_REGISTRY
from vllm.logger import init_logger
from vllm.model_executor.layers.fused_moe import FusedMoE
from vllm.model_executor.layers.rotary_embedding import MRotaryEmbedding
@@ -130,7 +129,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
self.cascade_attn_enabled = not self.model_config.disable_cascade_attn
# Multi-modal data support
self.input_registry = INPUT_REGISTRY
self.mm_registry = MULTIMODAL_REGISTRY
self.uses_mrope = model_config.uses_mrope
@@ -1473,16 +1471,11 @@ class GPUModelRunner(LoRAModelRunnerMixin):
encoder_budget, max_num_mm_items, dummy_data_modality)
# Create dummy batch of multimodal inputs.
dummy_request_data = self.input_registry.dummy_data_for_profiling(
dummy_request_data = self.mm_registry.get_decoder_dummy_data(
model_config=self.model_config,
seq_len=self.max_num_tokens,
mm_registry=self.mm_registry,
)
dummy_mm_data = dummy_request_data.multi_modal_data
if not isinstance(dummy_mm_data, MultiModalKwargs):
# TODO: Delete this check once input mapper is fully removed.
raise RuntimeError(
"Legacy input mapper is not supported in V1")
# Dummy data definition may contain multiple multimodal items
# (e.g, multiple images) for a single request, therefore here we

View File

@@ -17,7 +17,6 @@ from vllm.attention.backends.abstract import AttentionType
from vllm.attention.layer import Attention
from vllm.config import VllmConfig
from vllm.forward_context import set_forward_context
from vllm.inputs import INPUT_REGISTRY
from vllm.logger import init_logger
from vllm.model_executor.model_loader import get_model
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalKwargs
@@ -102,7 +101,6 @@ class TPUModelRunner:
self.hidden_size = model_config.get_hidden_size()
# Multi-modal data support
self.input_registry = INPUT_REGISTRY
self.mm_registry = MULTIMODAL_REGISTRY
self.uses_mrope = model_config.uses_mrope
# TODO: Support M-RoPE (e.g, Qwen2-VL)