[1/N] Initial prototype for multi-modal processor (#10044)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -5,25 +5,21 @@ from abc import ABC, abstractmethod
|
||||
from array import array
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass, field
|
||||
from functools import cached_property, reduce
|
||||
from typing import (TYPE_CHECKING, Any, Callable, DefaultDict, Dict, List,
|
||||
Mapping, Optional)
|
||||
from functools import reduce
|
||||
from typing import Any, Callable, DefaultDict, Dict, List, Mapping, Optional
|
||||
from typing import Sequence as GenericSequence
|
||||
from typing import Set, Tuple, Union
|
||||
|
||||
import msgspec
|
||||
import torch
|
||||
from typing_extensions import assert_never
|
||||
|
||||
from vllm.inputs import SingletonInputs, SingletonInputsAdapter
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.multimodal import MultiModalDataDict, MultiModalPlaceholderDict
|
||||
from vllm.pooling_params import PoolingParams
|
||||
from vllm.prompt_adapter.request import PromptAdapterRequest
|
||||
from vllm.sampling_params import RequestOutputKind, SamplingParams
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from vllm.inputs import SingletonInputs
|
||||
|
||||
VLLM_TOKEN_ID_ARRAY_TYPE = "l"
|
||||
|
||||
VLLM_INVALID_TOKEN_ID = -1
|
||||
@@ -407,14 +403,14 @@ class Sequence:
|
||||
def __init__(
|
||||
self,
|
||||
seq_id: int,
|
||||
inputs: "SingletonInputs",
|
||||
inputs: SingletonInputs,
|
||||
block_size: int,
|
||||
eos_token_id: Optional[int] = None,
|
||||
lora_request: Optional[LoRARequest] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
) -> None:
|
||||
self.seq_id = seq_id
|
||||
self.inputs = inputs
|
||||
self.inputs = SingletonInputsAdapter(inputs)
|
||||
self.block_size = block_size
|
||||
self.eos_token_id = eos_token_id
|
||||
self.lora_request = lora_request
|
||||
@@ -441,59 +437,29 @@ class Sequence:
|
||||
def n_blocks(self) -> int:
|
||||
return (self.get_len() + self.block_size - 1) // self.block_size
|
||||
|
||||
@cached_property
|
||||
@property
|
||||
def prompt(self) -> Optional[str]:
|
||||
inputs = self.inputs
|
||||
return self.inputs.prompt
|
||||
|
||||
if inputs["type"] == "token":
|
||||
return inputs.get("prompt")
|
||||
|
||||
assert_never(inputs)
|
||||
|
||||
@cached_property
|
||||
@property
|
||||
def prompt_token_ids(self) -> List[int]:
|
||||
inputs = self.inputs
|
||||
return self.inputs.prompt_token_ids
|
||||
|
||||
if inputs["type"] == "token":
|
||||
return inputs.get("prompt_token_ids", [])
|
||||
|
||||
assert_never(inputs)
|
||||
|
||||
@cached_property
|
||||
@property
|
||||
def prompt_embeds(self) -> Optional[torch.Tensor]:
|
||||
inputs = self.inputs
|
||||
return self.inputs.prompt_embeds
|
||||
|
||||
if inputs["type"] == "token":
|
||||
return None
|
||||
|
||||
assert_never(inputs)
|
||||
|
||||
@cached_property
|
||||
@property
|
||||
def multi_modal_data(self) -> "MultiModalDataDict":
|
||||
inputs = self.inputs
|
||||
|
||||
if inputs["type"] == "token":
|
||||
return inputs.get("multi_modal_data", {})
|
||||
|
||||
assert_never(inputs)
|
||||
|
||||
@cached_property
|
||||
def mm_processor_kwargs(self) -> Dict[str, Any]:
|
||||
inputs = self.inputs
|
||||
|
||||
if inputs["type"] == "token":
|
||||
return inputs.get("mm_processor_kwargs", {})
|
||||
|
||||
assert_never(inputs)
|
||||
return self.inputs.multi_modal_data
|
||||
|
||||
@property
|
||||
def multi_modal_placeholders(self) -> MultiModalPlaceholderDict:
|
||||
inputs = self.inputs
|
||||
return self.inputs.multi_modal_placeholders
|
||||
|
||||
if inputs["type"] == "token":
|
||||
return inputs.get("multi_modal_placeholders", {})
|
||||
|
||||
assert_never(inputs)
|
||||
@property
|
||||
def mm_processor_kwargs(self) -> Dict[str, Any]:
|
||||
return self.inputs.mm_processor_kwargs
|
||||
|
||||
@property
|
||||
def lora_int_id(self) -> int:
|
||||
|
||||
Reference in New Issue
Block a user