[Core] Remove legacy input mapper/processor from V0 (#15686)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -223,28 +223,6 @@ class InputPreprocessor:
|
||||
lora_request=lora_request,
|
||||
add_special_tokens=add_special_tokens)
|
||||
|
||||
def _can_process_multimodal(self) -> bool:
|
||||
model_config = self.model_config
|
||||
|
||||
if not model_config.is_multimodal_model:
|
||||
raise ValueError("Your model does not support multi-modal inputs")
|
||||
|
||||
# Interim measure so we can handle models that have yet to be
|
||||
# updated to use the new multi-modal processor
|
||||
can_process_multimodal = self.mm_registry.has_processor(model_config)
|
||||
if not can_process_multimodal:
|
||||
from vllm.model_executor.models.registry import _VLLM_MODELS
|
||||
if not any(arch in _VLLM_MODELS
|
||||
for arch in model_config.architectures):
|
||||
logger.warning_once(
|
||||
"Your model uses the legacy input pipeline, which will be "
|
||||
"removed in an upcoming release. "
|
||||
"Please upgrade to the new multi-modal processing pipeline "
|
||||
"(https://docs.vllm.ai/en/latest/design/mm_processing.html)"
|
||||
)
|
||||
|
||||
return can_process_multimodal
|
||||
|
||||
def _process_multimodal(
|
||||
self,
|
||||
prompt: Union[str, list[int]],
|
||||
@@ -258,8 +236,7 @@ class InputPreprocessor:
|
||||
returning the corresponding token IDs and metadata.
|
||||
"""
|
||||
# At the moment on model (PrithviGeoSpatialMAE) requires to be
|
||||
# initialized without a tokenizer while using also multi-modal
|
||||
# input.
|
||||
# initialized without a tokenizer while using also multi-modal input
|
||||
if not self.tokenizer:
|
||||
tokenizer = object() # Dummy
|
||||
else:
|
||||
@@ -285,8 +262,7 @@ class InputPreprocessor:
|
||||
) -> MultiModalInputs:
|
||||
"""Async version of :meth:`_process_multimodal`."""
|
||||
# At the moment on model (PrithviGeoSpatialMAE) requires to be
|
||||
# initialized without a tokenizer while using also multi-modal
|
||||
# input.
|
||||
# initialized without a tokenizer while using also multi-modal input
|
||||
if not self.tokenizer:
|
||||
tokenizer = object() # Dummy
|
||||
else:
|
||||
@@ -343,7 +319,7 @@ class InputPreprocessor:
|
||||
multi_modal_data = tokens_content.get("multi_modal_data")
|
||||
mm_processor_kwargs = tokens_content.get("mm_processor_kwargs")
|
||||
|
||||
if multi_modal_data is not None and self._can_process_multimodal():
|
||||
if multi_modal_data is not None:
|
||||
return self._process_multimodal(
|
||||
prompt_token_ids,
|
||||
multi_modal_data,
|
||||
@@ -355,8 +331,6 @@ class InputPreprocessor:
|
||||
return token_inputs(
|
||||
prompt_token_ids=prompt_token_ids,
|
||||
token_type_ids=token_type_ids,
|
||||
multi_modal_data=multi_modal_data,
|
||||
mm_processor_kwargs=mm_processor_kwargs,
|
||||
)
|
||||
|
||||
if parsed["type"] == "text":
|
||||
@@ -366,7 +340,7 @@ class InputPreprocessor:
|
||||
multi_modal_data = text_content.get("multi_modal_data")
|
||||
mm_processor_kwargs = text_content.get("mm_processor_kwargs")
|
||||
|
||||
if multi_modal_data is not None and self._can_process_multimodal():
|
||||
if multi_modal_data is not None:
|
||||
return self._process_multimodal(
|
||||
prompt_text,
|
||||
multi_modal_data,
|
||||
@@ -383,8 +357,6 @@ class InputPreprocessor:
|
||||
return token_inputs(
|
||||
prompt=prompt_text,
|
||||
prompt_token_ids=prompt_token_ids,
|
||||
multi_modal_data=multi_modal_data,
|
||||
mm_processor_kwargs=mm_processor_kwargs,
|
||||
)
|
||||
|
||||
assert_never(parsed)
|
||||
@@ -417,7 +389,7 @@ class InputPreprocessor:
|
||||
multi_modal_data = tokens_content.get("multi_modal_data")
|
||||
mm_processor_kwargs = tokens_content.get("mm_processor_kwargs")
|
||||
|
||||
if multi_modal_data is not None and self._can_process_multimodal():
|
||||
if multi_modal_data is not None:
|
||||
return await self._process_multimodal_async(
|
||||
prompt_token_ids,
|
||||
multi_modal_data,
|
||||
@@ -426,11 +398,7 @@ class InputPreprocessor:
|
||||
return_mm_hashes=return_mm_hashes,
|
||||
)
|
||||
|
||||
return token_inputs(
|
||||
prompt_token_ids=prompt_token_ids,
|
||||
multi_modal_data=multi_modal_data,
|
||||
mm_processor_kwargs=mm_processor_kwargs,
|
||||
)
|
||||
return token_inputs(prompt_token_ids=prompt_token_ids)
|
||||
|
||||
if parsed["type"] == "text":
|
||||
text_content = parsed["content"]
|
||||
@@ -439,7 +407,7 @@ class InputPreprocessor:
|
||||
multi_modal_data = text_content.get("multi_modal_data")
|
||||
mm_processor_kwargs = text_content.get("mm_processor_kwargs")
|
||||
|
||||
if multi_modal_data is not None and self._can_process_multimodal():
|
||||
if multi_modal_data is not None:
|
||||
return await self._process_multimodal_async(
|
||||
prompt_text,
|
||||
multi_modal_data,
|
||||
@@ -456,8 +424,6 @@ class InputPreprocessor:
|
||||
return token_inputs(
|
||||
prompt=prompt_text,
|
||||
prompt_token_ids=prompt_token_ids,
|
||||
multi_modal_data=multi_modal_data,
|
||||
mm_processor_kwargs=mm_processor_kwargs,
|
||||
)
|
||||
|
||||
assert_never(parsed)
|
||||
@@ -594,15 +560,13 @@ class InputPreprocessor:
|
||||
decoder_inputs = self._prompt_to_llm_inputs(decoder_input)
|
||||
# For multimodal model, override decoder prompt from processor
|
||||
# with explicit decoder prompt.
|
||||
if self.model_config.is_multimodal_model and (
|
||||
self._can_process_multimodal()):
|
||||
if self.model_config.is_multimodal_model:
|
||||
encoder_inputs, decoder_inputs = (
|
||||
self._separate_enc_dec_inputs_from_mm_processor_outputs(
|
||||
encoder_inputs, decoder_inputs))
|
||||
else:
|
||||
inputs = self._prompt_to_llm_inputs(prompt)
|
||||
if self.model_config.is_multimodal_model and (
|
||||
self._can_process_multimodal()):
|
||||
if self.model_config.is_multimodal_model:
|
||||
# Encoder-Decoder Multimodal model
|
||||
encoder_inputs, decoder_inputs = (
|
||||
self._separate_enc_dec_inputs_from_mm_processor_outputs(
|
||||
@@ -637,15 +601,13 @@ class InputPreprocessor:
|
||||
|
||||
# For multimodal model, override decoder prompt from processor
|
||||
# with explicit decoder prompt.
|
||||
if self.model_config.is_multimodal_model and (
|
||||
self._can_process_multimodal()):
|
||||
if self.model_config.is_multimodal_model:
|
||||
encoder_inputs, decoder_inputs = (
|
||||
self._separate_enc_dec_inputs_from_mm_processor_outputs(
|
||||
encoder_inputs, decoder_inputs))
|
||||
else:
|
||||
inputs = await self._prompt_to_llm_inputs_async(prompt)
|
||||
if self.model_config.is_multimodal_model and (
|
||||
self._can_process_multimodal()):
|
||||
if self.model_config.is_multimodal_model:
|
||||
# Encoder-Decoder Multimodal model
|
||||
encoder_inputs, decoder_inputs = (
|
||||
self._separate_enc_dec_inputs_from_mm_processor_outputs(
|
||||
|
||||
Reference in New Issue
Block a user