[Core] Rename input data types (#8688)

2024-10-16 18:49:37 +08:00
parent 1de76a0e55
commit cee711fdbb
32 changed files with 438 additions and 340 deletions
--- a/vllm/sequence.py
+++ b/vllm/sequence.py
@@ -13,8 +13,7 @@ from typing import Set, Tuple, Union, cast
 import msgspec
 import torch

-from vllm.inputs import EncoderDecoderLLMInputs, LLMInputs
-from vllm.inputs.parse import is_valid_encoder_decoder_llm_inputs
+from vllm.inputs.parse import is_encoder_decoder_inputs
 from vllm.lora.request import LoRARequest
 from vllm.pooling_params import PoolingParams
 from vllm.prompt_adapter.request import PromptAdapterRequest
@@ -22,6 +21,7 @@ from vllm.sampling_params import SamplingParams
 from vllm.spec_decode.metrics import SpecDecodeWorkerMetrics

 if TYPE_CHECKING:
+    from vllm.inputs import SingletonInputs
    from vllm.multimodal.base import MultiModalDataDict

 VLLM_TOKEN_ID_ARRAY_TYPE = "l"
@@ -29,6 +29,11 @@ VLLM_TOKEN_ID_ARRAY_TYPE = "l"
 VLLM_INVALID_TOKEN_ID = -1


+def array_full(token_id: int, count: int):
+    """:class:`array` equivalent of :func:`numpy.full`."""
+    return array(VLLM_TOKEN_ID_ARRAY_TYPE, [token_id]) * count
+
+
 # We use dataclass for now because it is used for
 # openai server output, and msgspec is not serializable.
 # TODO(sang): Fix it.
@@ -173,22 +178,34 @@ class SequenceData(msgspec.Struct,
    _mrope_position_delta: Optional[int] = None

    @staticmethod
-    def from_token_counts(*token_counts: Tuple[int, int]) -> "SequenceData":
+    def from_prompt_token_counts(
+            *token_counts: Tuple[int, int]) -> "SequenceData":
+        """
+        Construct a :class:`SequenceData` instance by concatenating
+        prompt token sequences.
+
+        Each tuple represents one token sequence, expressed in the form
+        :code:`(token_id, count)`.
+        """
        if len(token_counts) == 0:
            return SequenceData.from_seqs([])

-        arrs = [
-            array(VLLM_TOKEN_ID_ARRAY_TYPE, [token_id]) * count
-            for token_id, count in token_counts
-        ]
+        prompt_token_ids_arr = reduce(
+            array.__iadd__,
+            (array_full(token_id, count) for token_id, count in token_counts),
+        )

-        return SequenceData(reduce(array.__add__, arrs))
+        return SequenceData(prompt_token_ids_arr)

    @staticmethod
    def from_seqs(
        prompt_token_ids: GenericSequence[int],
        output_token_ids: Optional[GenericSequence[int]] = None,
    ) -> "SequenceData":
+        """
+        Construct a :class:`SequenceData` instance from prompt and output
+        token sequences.
+        """
        prompt_token_ids_arr = array(VLLM_TOKEN_ID_ARRAY_TYPE,
                                     prompt_token_ids)

@@ -362,14 +379,14 @@ class SequenceData(msgspec.Struct,
 class Sequence:
    """Stores the data, status, and block information of a sequence.

-    The sequence is constructed from the LLMInputs instance passed
-    in through the `inputs` constructor argument.
+    The sequence is constructed from the :code:`SingletonInputs` instance
+    passed in through the :code:`inputs` constructor argument.

-    For encoder/decoder models, LLMInputs encapsulates both a
+    For encoder/decoder models, SingletonInputs encapsulates both a
    decoder and encoder prompt, creating an ambiguity about which
    prompt to construct the sequence from. The `from_decoder_prompt`
    constructor argument signals whether to construct the Sequence
-    from the LLMInputs decoder prompt, or encoder prompt.
+    from the SingletonInputs decoder prompt, or encoder prompt.

    Args:
        seq_id: The ID of the sequence.
@@ -379,16 +396,16 @@ class Sequence:
        eos_token_id: The end-of-sequence (EOS) token id recognized by this LLM.
        lora_request: LoRA request.
        prompt_adapter_request: Prompt Adapter request.
-        from_decoder_prompt: Construct Sequence from LLMInputs decoder prompt
-                             (True) or encoder prompt (False.) Must be True
-                             for decoder-only model.
+        from_decoder_prompt: Construct Sequence from SingletonInputs decoder
+                             prompt (True) or encoder prompt (False.) Must be
+                             True for decoder-only model.

    """

    def __init__(
        self,
        seq_id: int,
-        inputs: "LLMInputs",
+        inputs: "SingletonInputs",
        block_size: int,
        eos_token_id: Optional[int] = None,
        lora_request: Optional[LoRARequest] = None,
@@ -404,19 +421,19 @@ class Sequence:
        self.from_decoder_prompt = from_decoder_prompt

        # For decoder-only models, a Sequence is constructed
-        # from an LLMInputs instance (the `inputs` arg.)
+        # from an DecoderOnlyInputs instance (the `inputs` arg.)
        #
        # For encoder/decoder models the same `inputs`
        # instance could be utilized to construct either an
        # encoder sequence or a decoder sequence, because
-        # `LLMInputs` has both decoder- and encoder-oriented
+        # `DecoderOnlyInputs` has both decoder- and encoder-oriented
        # member variables (i.e. it encapsulates both an encoder
        # and a decoder prompt.) The decision of which type of sequence
        # to generate is determined by the `from_decoder_prompt` argument.
        #
        # When constructing a encoder sequence
        # (`from_decoder_prompt` False) it matters that
-        # the `LLMInputs` instance stored in `inputs` is valid
+        # the `DecoderOnlyInputs` instance stored in `inputs` is valid
        # in the sense that its encoder-related member variables are
        # populated; below, an exception is raised if this is
        # not the case.
@@ -424,8 +441,7 @@ class Sequence:
        # When constructing a decoder sequence (`from_decoder_prompt` True)
        # it does not matter whether `inputs` has its encoder-related
        # member variables populated.
-        if not (from_decoder_prompt
-                or is_valid_encoder_decoder_llm_inputs(inputs)):
+        if not (from_decoder_prompt or is_encoder_decoder_inputs(inputs)):
            raise ValueError("Cannot extract encoder input prompt from "
                             f"invalid input {inputs}; did you forget the "
                             "encoder input prompt fields?")
@@ -471,15 +487,19 @@ class Sequence:

    @property
    def multi_modal_data(self) -> "MultiModalDataDict":
-        if self.inputs.get("multi_modal_data") and self.inputs.get(
-                "encoder_multi_modal_data"):
+        inputs = self.inputs
+
+        if (inputs.get("multi_modal_data")
+                and inputs.get("encoder_multi_modal_data")):
            raise ValueError(
                "Multi-modal data in both encoder and decoder is not supported."
            )
-        inputs = self.inputs
-        return self.inputs.get("multi_modal_data") or (cast(
-            EncoderDecoderLLMInputs,
-            inputs).get("encoder_multi_modal_data")) or {}
+
+        return cast(
+            "MultiModalDataDict",
+            (inputs.get("multi_modal_data")
+             or inputs.get("encoder_multi_modal_data") or {}),
+        )

    @property
    def mm_processor_kwargs(self) -> Dict[str, Any]: