[1/N] Initial prototype for multi-modal processor (#10044)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2024-11-13 20:39:03 +08:00
committed by GitHub
parent bb7991aa29
commit 0b8bb86bf1
48 changed files with 1132 additions and 436 deletions

View File

@@ -5,25 +5,21 @@ from abc import ABC, abstractmethod
from array import array
from collections import defaultdict
from dataclasses import dataclass, field
from functools import cached_property, reduce
from typing import (TYPE_CHECKING, Any, Callable, DefaultDict, Dict, List,
Mapping, Optional)
from functools import reduce
from typing import Any, Callable, DefaultDict, Dict, List, Mapping, Optional
from typing import Sequence as GenericSequence
from typing import Set, Tuple, Union
import msgspec
import torch
from typing_extensions import assert_never
from vllm.inputs import SingletonInputs, SingletonInputsAdapter
from vllm.lora.request import LoRARequest
from vllm.multimodal import MultiModalDataDict, MultiModalPlaceholderDict
from vllm.pooling_params import PoolingParams
from vllm.prompt_adapter.request import PromptAdapterRequest
from vllm.sampling_params import RequestOutputKind, SamplingParams
if TYPE_CHECKING:
from vllm.inputs import SingletonInputs
VLLM_TOKEN_ID_ARRAY_TYPE = "l"
VLLM_INVALID_TOKEN_ID = -1
@@ -407,14 +403,14 @@ class Sequence:
def __init__(
self,
seq_id: int,
inputs: "SingletonInputs",
inputs: SingletonInputs,
block_size: int,
eos_token_id: Optional[int] = None,
lora_request: Optional[LoRARequest] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
) -> None:
self.seq_id = seq_id
self.inputs = inputs
self.inputs = SingletonInputsAdapter(inputs)
self.block_size = block_size
self.eos_token_id = eos_token_id
self.lora_request = lora_request
@@ -441,59 +437,29 @@ class Sequence:
def n_blocks(self) -> int:
return (self.get_len() + self.block_size - 1) // self.block_size
@cached_property
@property
def prompt(self) -> Optional[str]:
inputs = self.inputs
return self.inputs.prompt
if inputs["type"] == "token":
return inputs.get("prompt")
assert_never(inputs)
@cached_property
@property
def prompt_token_ids(self) -> List[int]:
inputs = self.inputs
return self.inputs.prompt_token_ids
if inputs["type"] == "token":
return inputs.get("prompt_token_ids", [])
assert_never(inputs)
@cached_property
@property
def prompt_embeds(self) -> Optional[torch.Tensor]:
inputs = self.inputs
return self.inputs.prompt_embeds
if inputs["type"] == "token":
return None
assert_never(inputs)
@cached_property
@property
def multi_modal_data(self) -> "MultiModalDataDict":
inputs = self.inputs
if inputs["type"] == "token":
return inputs.get("multi_modal_data", {})
assert_never(inputs)
@cached_property
def mm_processor_kwargs(self) -> Dict[str, Any]:
inputs = self.inputs
if inputs["type"] == "token":
return inputs.get("mm_processor_kwargs", {})
assert_never(inputs)
return self.inputs.multi_modal_data
@property
def multi_modal_placeholders(self) -> MultiModalPlaceholderDict:
inputs = self.inputs
return self.inputs.multi_modal_placeholders
if inputs["type"] == "token":
return inputs.get("multi_modal_placeholders", {})
assert_never(inputs)
@property
def mm_processor_kwargs(self) -> Dict[str, Any]:
return self.inputs.mm_processor_kwargs
@property
def lora_int_id(self) -> int: