Add full API docs and improve the UX of navigating them (#17485)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-05-04 03:42:43 +01:00
committed by GitHub
parent 46fae69cf0
commit d6484ef3c3
101 changed files with 872 additions and 980 deletions

View File

@@ -10,7 +10,7 @@ from .registry import (DummyData, InputContext, InputProcessingContext,
INPUT_REGISTRY = InputRegistry()
"""
The global :class:`~InputRegistry` which is used by :class:`~vllm.LLMEngine`
The global {class}`~InputRegistry` which is used by {class}`~vllm.LLMEngine`
to dispatch data processing according to the target model.
"""

View File

@@ -80,22 +80,22 @@ SingletonPrompt = Union[str, TextPrompt, TokensPrompt, EmbedsPrompt]
"""
Set of possible schemas for a single prompt:
- A text prompt (:class:`str` or :class:`TextPrompt`)
- A tokenized prompt (:class:`TokensPrompt`)
- An embeddings prompt (:class:`EmbedsPrompt`)
- A text prompt ({class}`str` or {class}`TextPrompt`)
- A tokenized prompt ({class}`TokensPrompt`)
- An embeddings prompt ({class}`EmbedsPrompt`)
Note that "singleton" is as opposed to a data structure
which encapsulates multiple prompts, i.e. of the sort
which may be utilized for encoder/decoder models when
the user desires to express both the encoder & decoder
prompts explicitly, i.e. :class:`ExplicitEncoderDecoderPrompt`
prompts explicitly, i.e. {class}`ExplicitEncoderDecoderPrompt`
A prompt of type :class:`SingletonPrompt` may be employed
A prompt of type {class}`SingletonPrompt` may be employed
as (1) input to a decoder-only model, (2) input to
the encoder of an encoder/decoder model, in the scenario
where the decoder-prompt is not specified explicitly, or
(3) as a member of a larger data structure encapsulating
more than one prompt, i.e. :class:`ExplicitEncoderDecoderPrompt`
more than one prompt, i.e. {class}`ExplicitEncoderDecoderPrompt`
"""
_T1_co = TypeVar("_T1_co",
@@ -115,18 +115,18 @@ class ExplicitEncoderDecoderPrompt(TypedDict, Generic[_T1_co, _T2_co]):
comprising an explicit encoder prompt and a decoder prompt.
The encoder and decoder prompts, respectively, may be formatted
according to any of the :class:`SingletonPrompt` schemas,
according to any of the {class}`SingletonPrompt` schemas,
and are not required to have the same schema.
Only the encoder prompt may have multi-modal data. mm_processor_kwargs
should be at the top-level, and should not be set in the encoder/decoder
prompts, since they are agnostic to the encoder/decoder.
Note that an :class:`ExplicitEncoderDecoderPrompt` may not
Note that an {class}`ExplicitEncoderDecoderPrompt` may not
be used as an input to a decoder-only model,
and that the :code:`encoder_prompt` and :code:`decoder_prompt`
and that the `encoder_prompt` and `decoder_prompt`
fields of this data structure themselves must be
:class:`SingletonPrompt` instances.
{class}`SingletonPrompt` instances.
"""
encoder_prompt: _T1_co
@@ -141,11 +141,11 @@ PromptType = Union[SingletonPrompt, ExplicitEncoderDecoderPrompt]
Set of possible schemas for an LLM input, including
both decoder-only and encoder/decoder input types:
- A text prompt (:class:`str` or :class:`TextPrompt`)
- A tokenized prompt (:class:`TokensPrompt`)
- An embeddings prompt (:class:`EmbedsPrompt`)
- A text prompt ({class}`str` or {class}`TextPrompt`)
- A tokenized prompt ({class}`TokensPrompt`)
- An embeddings prompt ({class}`EmbedsPrompt`)
- A single data structure containing both an encoder and a decoder prompt
(:class:`ExplicitEncoderDecoderPrompt`)
({class}`ExplicitEncoderDecoderPrompt`)
"""
@@ -178,7 +178,7 @@ def token_inputs(
prompt: Optional[str] = None,
cache_salt: Optional[str] = None,
) -> TokenInputs:
"""Construct :class:`TokenInputs` from optional values."""
"""Construct {class}`TokenInputs` from optional values."""
inputs = TokenInputs(type="token", prompt_token_ids=prompt_token_ids)
if prompt is not None:
@@ -221,7 +221,7 @@ def embeds_inputs(
DecoderOnlyInputs = Union[TokenInputs, EmbedsInputs, "MultiModalInputs"]
"""
The inputs in :class:`~vllm.LLMEngine` before they are
The inputs in {class}`~vllm.LLMEngine` before they are
passed to the model executor.
This specifies the data required for decoder-only models.
"""
@@ -229,7 +229,7 @@ This specifies the data required for decoder-only models.
class EncoderDecoderInputs(TypedDict):
"""
The inputs in :class:`~vllm.LLMEngine` before they are
The inputs in {class}`~vllm.LLMEngine` before they are
passed to the model executor.
This specifies the required data for encoder-decoder models.
@@ -243,13 +243,13 @@ class EncoderDecoderInputs(TypedDict):
SingletonInputs = Union[TokenInputs, EmbedsInputs, "MultiModalInputs"]
"""
A processed :class:`SingletonPrompt` which can be passed to
:class:`vllm.sequence.Sequence`.
A processed {class}`SingletonPrompt` which can be passed to
{class}`vllm.sequence.Sequence`.
"""
ProcessorInputs = Union[DecoderOnlyInputs, EncoderDecoderInputs]
"""
The inputs to :data:`vllm.inputs.InputProcessor`.
The inputs to {data}`vllm.inputs.InputProcessor`.
"""
_T1 = TypeVar("_T1", bound=SingletonPrompt, default=SingletonPrompt)
@@ -277,7 +277,7 @@ def zip_enc_dec_prompts(
) -> list[ExplicitEncoderDecoderPrompt[_T1, _T2]]:
"""
Zip encoder and decoder prompts together into a list of
:class:`ExplicitEncoderDecoderPrompt` instances.
{class}`ExplicitEncoderDecoderPrompt` instances.
``mm_processor_kwargs`` may also be provided; if a dict is passed, the same
dictionary will be used for every encoder/decoder prompt. If an iterable is

View File

@@ -224,7 +224,7 @@ class InputPreprocessor:
lora_request: Optional[LoRARequest],
tokenization_kwargs: Optional[dict[str, Any]] = None,
) -> list[int]:
"""Async version of :meth:`_tokenize_prompt`."""
"""Async version of {meth}`_tokenize_prompt`."""
tokenizer = self.get_tokenizer_group()
tokenization_kwargs = self._get_tokenization_kw(tokenization_kwargs)
@@ -287,7 +287,7 @@ class InputPreprocessor:
lora_request: Optional[LoRARequest],
return_mm_hashes: bool = False,
) -> MultiModalInputs:
"""Async version of :meth:`_process_multimodal`."""
"""Async version of {meth}`_process_multimodal`."""
tokenizer = await self._get_mm_tokenizer_async(lora_request)
mm_processor = self.mm_registry.create_processor(self.model_config,
@@ -472,7 +472,7 @@ class InputPreprocessor:
Returns:
* :class:`SingletonInputs` instance
* {class}`SingletonInputs` instance
"""
parsed = parse_singleton_prompt(prompt)
@@ -508,7 +508,7 @@ class InputPreprocessor:
lora_request: Optional[LoRARequest] = None,
return_mm_hashes: bool = False,
) -> SingletonInputs:
"""Async version of :meth:`_prompt_to_llm_inputs`."""
"""Async version of {meth}`_prompt_to_llm_inputs`."""
parsed = parse_singleton_prompt(prompt)
if parsed["type"] == "embeds":
@@ -644,7 +644,7 @@ class InputPreprocessor:
) -> EncoderDecoderInputs:
"""
For encoder/decoder models only:
Process an input prompt into an :class:`EncoderDecoderInputs` instance.
Process an input prompt into an {class}`EncoderDecoderInputs` instance.
There are two types of input prompts:
singleton prompts which carry only the
@@ -670,7 +670,7 @@ class InputPreprocessor:
Returns:
* :class:`EncoderDecoderInputs` instance
* {class}`EncoderDecoderInputs` instance
"""
encoder_inputs: SingletonInputs
decoder_inputs: Optional[SingletonInputs]
@@ -710,7 +710,7 @@ class InputPreprocessor:
prompt: PromptType,
tokenization_kwargs: Optional[dict[str, Any]] = None,
) -> EncoderDecoderInputs:
"""Async version of :meth:`_process_encoder_decoder_prompt`."""
"""Async version of {meth}`_process_encoder_decoder_prompt`."""
encoder_inputs: SingletonInputs
decoder_inputs: Optional[SingletonInputs]
@@ -778,7 +778,7 @@ class InputPreprocessor:
) -> DecoderOnlyInputs:
"""
For decoder-only models:
Process an input prompt into an :class:`DecoderOnlyInputs` instance.
Process an input prompt into an {class}`DecoderOnlyInputs` instance.
Arguments:
@@ -789,7 +789,7 @@ class InputPreprocessor:
Returns:
* :class:`DecoderOnlyInputs` instance
* {class}`DecoderOnlyInputs` instance
"""
prompt_comps = self._prompt_to_llm_inputs(
@@ -812,7 +812,7 @@ class InputPreprocessor:
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
return_mm_hashes: bool = False,
) -> DecoderOnlyInputs:
"""Async version of :meth:`_process_decoder_only_prompt`."""
"""Async version of {meth}`_process_decoder_only_prompt`."""
prompt_comps = await self._prompt_to_llm_inputs_async(
prompt,
tokenization_kwargs=tokenization_kwargs,
@@ -863,7 +863,7 @@ class InputPreprocessor:
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
return_mm_hashes: bool = False,
) -> ProcessorInputs:
"""Async version of :meth:`preprocess`."""
"""Async version of {meth}`preprocess`."""
if self.model_config.is_encoder_decoder:
assert not return_mm_hashes, (
"Multimodal hashes for encoder-decoder models should not be ",

View File

@@ -38,7 +38,7 @@ class InputContext:
) -> _C:
"""
Get the HuggingFace configuration
(:class:`transformers.PretrainedConfig`) of the model,
({class}`transformers.PretrainedConfig`) of the model,
additionally checking its type.
Raises:
@@ -79,7 +79,7 @@ class InputContext:
) -> _P:
"""
Get the HuggingFace processor
(:class:`transformers.ProcessorMixin`) of the model,
({class}`transformers.ProcessorMixin`) of the model,
additionally checking its type.
Raises:
@@ -135,8 +135,8 @@ class InputProcessingContext(InputContext):
kwargs: Mapping[str, object] = {},
) -> BatchFeature:
"""
Call :code:`hf_processor` on the prompt :code:`data`
(text, image, audio...) with configurable options :code:`kwargs`.
Call `hf_processor` on the prompt `data`
(text, image, audio...) with configurable options `kwargs`.
"""
assert callable(hf_processor)