[Core] [Bugfix] [Multimodal] Fix multimodal profiling and generation for SFT/PTQed models (#20058)
Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
This commit is contained in:
@@ -265,7 +265,8 @@ class InputPreprocessor:
|
||||
prompt: Union[str, list[int]],
|
||||
mm_data: MultiModalDataDict,
|
||||
mm_processor_kwargs: Optional[Mapping[str, object]],
|
||||
lora_request: Optional[LoRARequest],
|
||||
tokenization_kwargs: Optional[dict[str, Any]] = None,
|
||||
lora_request: Optional[LoRARequest] = None,
|
||||
return_mm_hashes: bool = False,
|
||||
) -> MultiModalInputs:
|
||||
"""
|
||||
@@ -280,15 +281,19 @@ class InputPreprocessor:
|
||||
if mm_processor_kwargs is None:
|
||||
mm_processor_kwargs = {}
|
||||
|
||||
return mm_processor.apply(prompt, mm_data, mm_processor_kwargs,
|
||||
return_mm_hashes)
|
||||
return mm_processor.apply(prompt,
|
||||
mm_data,
|
||||
hf_processor_mm_kwargs=mm_processor_kwargs,
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
return_mm_hashes=return_mm_hashes)
|
||||
|
||||
async def _process_multimodal_async(
|
||||
self,
|
||||
prompt: Union[str, list[int]],
|
||||
mm_data: MultiModalDataDict,
|
||||
mm_processor_kwargs: Optional[Mapping[str, object]],
|
||||
lora_request: Optional[LoRARequest],
|
||||
tokenization_kwargs: Optional[dict[str, Any]] = None,
|
||||
lora_request: Optional[LoRARequest] = None,
|
||||
return_mm_hashes: bool = False,
|
||||
) -> MultiModalInputs:
|
||||
"""
|
||||
@@ -302,8 +307,11 @@ class InputPreprocessor:
|
||||
if mm_processor_kwargs is None:
|
||||
mm_processor_kwargs = {}
|
||||
|
||||
return mm_processor.apply(prompt, mm_data, mm_processor_kwargs,
|
||||
return_mm_hashes)
|
||||
return mm_processor.apply(prompt,
|
||||
mm_data,
|
||||
hf_processor_mm_kwargs=mm_processor_kwargs,
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
return_mm_hashes=return_mm_hashes)
|
||||
|
||||
def _process_embeds(
|
||||
self,
|
||||
@@ -338,6 +346,7 @@ class InputPreprocessor:
|
||||
def _process_tokens(
|
||||
self,
|
||||
parsed_content: TokensPrompt,
|
||||
tokenization_kwargs: Optional[dict[str, Any]] = None,
|
||||
lora_request: Optional[LoRARequest] = None,
|
||||
return_mm_hashes: bool = False,
|
||||
) -> Union[TokenInputs, MultiModalInputs]:
|
||||
@@ -350,6 +359,7 @@ class InputPreprocessor:
|
||||
prompt_token_ids,
|
||||
multi_modal_data,
|
||||
parsed_content.get("mm_processor_kwargs"),
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
lora_request=lora_request,
|
||||
return_mm_hashes=return_mm_hashes,
|
||||
)
|
||||
@@ -367,6 +377,7 @@ class InputPreprocessor:
|
||||
async def _process_tokens_async(
|
||||
self,
|
||||
parsed_content: TokensPrompt,
|
||||
tokenization_kwargs: Optional[dict[str, Any]] = None,
|
||||
lora_request: Optional[LoRARequest] = None,
|
||||
return_mm_hashes: bool = False,
|
||||
) -> Union[TokenInputs, MultiModalInputs]:
|
||||
@@ -379,6 +390,7 @@ class InputPreprocessor:
|
||||
prompt_token_ids,
|
||||
multi_modal_data,
|
||||
parsed_content.get("mm_processor_kwargs"),
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
lora_request=lora_request,
|
||||
return_mm_hashes=return_mm_hashes,
|
||||
)
|
||||
@@ -408,6 +420,7 @@ class InputPreprocessor:
|
||||
prompt_text,
|
||||
multi_modal_data,
|
||||
parsed_content.get("mm_processor_kwargs"),
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
lora_request=lora_request,
|
||||
return_mm_hashes=return_mm_hashes,
|
||||
)
|
||||
@@ -442,6 +455,7 @@ class InputPreprocessor:
|
||||
prompt_text,
|
||||
multi_modal_data,
|
||||
parsed_content.get("mm_processor_kwargs"),
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
lora_request=lora_request,
|
||||
return_mm_hashes=return_mm_hashes,
|
||||
)
|
||||
@@ -860,7 +874,8 @@ class InputPreprocessor:
|
||||
"returned until they are supported on vLLM V1.")
|
||||
# Encoder-decoder model requires special mapping of
|
||||
# input prompts to encoder & decoder
|
||||
return self._process_encoder_decoder_prompt(prompt)
|
||||
return self._process_encoder_decoder_prompt(
|
||||
prompt, tokenization_kwargs)
|
||||
|
||||
if is_explicit_encoder_decoder_prompt(prompt):
|
||||
raise ValueError("Cannot pass encoder-decoder prompt "
|
||||
|
||||
Reference in New Issue
Block a user