[1/N] Initial prototype for multi-modal processor (#10044)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2024-11-13 20:39:03 +08:00
committed by GitHub
parent bb7991aa29
commit 0b8bb86bf1
48 changed files with 1132 additions and 436 deletions

View File

@@ -30,7 +30,7 @@ from vllm.executor.executor_base import ExecutorBase
from vllm.executor.gpu_executor import GPUExecutor
from vllm.executor.ray_utils import initialize_ray_cluster
from vllm.inputs import (INPUT_REGISTRY, InputRegistry, ProcessorInputs,
PromptType)
PromptType, SingletonInputsAdapter)
from vllm.inputs.parse import is_encoder_decoder_inputs, is_token_prompt
from vllm.inputs.preprocess import InputPreprocessor
from vllm.logger import init_logger
@@ -39,6 +39,7 @@ from vllm.lora.request import LoRARequest
from vllm.model_executor.guided_decoding import (
get_local_guided_decoding_logits_processor)
from vllm.model_executor.layers.sampler import SamplerOutput
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
from vllm.outputs import (EmbeddingRequestOutput, RequestOutput,
RequestOutputFactory)
from vllm.pooling_params import PoolingParams
@@ -226,6 +227,7 @@ class LLMEngine:
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
stat_loggers: Optional[Dict[str, StatLoggerBase]] = None,
input_registry: InputRegistry = INPUT_REGISTRY,
mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
use_cached_outputs: bool = False,
) -> None:
@@ -335,7 +337,8 @@ class LLMEngine:
model_config)
self.input_preprocessor = InputPreprocessor(model_config,
self.tokenizer)
self.tokenizer,
mm_registry)
self.input_registry = input_registry
self.input_processor = input_registry.create_input_processor(
@@ -851,13 +854,6 @@ class LLMEngine:
)
processed_inputs = self.input_processor(preprocessed_inputs)
# This is a bit of a hack - copy the mm_processor_kwargs that were
# used in the input processor to the processed output, since these
# kwargs are presumed to be immutable and the values should be aligned
# between the input processor (here) and the input mapper.
processed_inputs["mm_processor_kwargs"] = preprocessed_inputs.get(
"mm_processor_kwargs")
self._add_processed_request(
request_id=request_id,
processed_inputs=processed_inputs,
@@ -2019,7 +2015,7 @@ class LLMEngine:
else:
prompt_inputs = inputs
prompt_ids = prompt_inputs.get("prompt_token_ids")
prompt_ids = SingletonInputsAdapter(prompt_inputs).prompt_token_ids
if prompt_ids is None or len(prompt_ids) == 0:
raise ValueError("Prompt cannot be empty")