[1/N] Initial prototype for multi-modal processor (#10044)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -30,7 +30,7 @@ from vllm.executor.executor_base import ExecutorBase
|
||||
from vllm.executor.gpu_executor import GPUExecutor
|
||||
from vllm.executor.ray_utils import initialize_ray_cluster
|
||||
from vllm.inputs import (INPUT_REGISTRY, InputRegistry, ProcessorInputs,
|
||||
PromptType)
|
||||
PromptType, SingletonInputsAdapter)
|
||||
from vllm.inputs.parse import is_encoder_decoder_inputs, is_token_prompt
|
||||
from vllm.inputs.preprocess import InputPreprocessor
|
||||
from vllm.logger import init_logger
|
||||
@@ -39,6 +39,7 @@ from vllm.lora.request import LoRARequest
|
||||
from vllm.model_executor.guided_decoding import (
|
||||
get_local_guided_decoding_logits_processor)
|
||||
from vllm.model_executor.layers.sampler import SamplerOutput
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
|
||||
from vllm.outputs import (EmbeddingRequestOutput, RequestOutput,
|
||||
RequestOutputFactory)
|
||||
from vllm.pooling_params import PoolingParams
|
||||
@@ -226,6 +227,7 @@ class LLMEngine:
|
||||
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
|
||||
stat_loggers: Optional[Dict[str, StatLoggerBase]] = None,
|
||||
input_registry: InputRegistry = INPUT_REGISTRY,
|
||||
mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
|
||||
use_cached_outputs: bool = False,
|
||||
) -> None:
|
||||
|
||||
@@ -335,7 +337,8 @@ class LLMEngine:
|
||||
model_config)
|
||||
|
||||
self.input_preprocessor = InputPreprocessor(model_config,
|
||||
self.tokenizer)
|
||||
self.tokenizer,
|
||||
mm_registry)
|
||||
|
||||
self.input_registry = input_registry
|
||||
self.input_processor = input_registry.create_input_processor(
|
||||
@@ -851,13 +854,6 @@ class LLMEngine:
|
||||
)
|
||||
processed_inputs = self.input_processor(preprocessed_inputs)
|
||||
|
||||
# This is a bit of a hack - copy the mm_processor_kwargs that were
|
||||
# used in the input processor to the processed output, since these
|
||||
# kwargs are presumed to be immutable and the values should be aligned
|
||||
# between the input processor (here) and the input mapper.
|
||||
processed_inputs["mm_processor_kwargs"] = preprocessed_inputs.get(
|
||||
"mm_processor_kwargs")
|
||||
|
||||
self._add_processed_request(
|
||||
request_id=request_id,
|
||||
processed_inputs=processed_inputs,
|
||||
@@ -2019,7 +2015,7 @@ class LLMEngine:
|
||||
else:
|
||||
prompt_inputs = inputs
|
||||
|
||||
prompt_ids = prompt_inputs.get("prompt_token_ids")
|
||||
prompt_ids = SingletonInputsAdapter(prompt_inputs).prompt_token_ids
|
||||
|
||||
if prompt_ids is None or len(prompt_ids) == 0:
|
||||
raise ValueError("Prompt cannot be empty")
|
||||
|
||||
Reference in New Issue
Block a user