Add full API docs and improve the UX of navigating them (#17485)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -170,9 +170,10 @@ class Worker(WorkerBase):
|
||||
Then, it calculate the free memory that can be used for KV cache in
|
||||
bytes.
|
||||
|
||||
.. tip::
|
||||
You may limit the usage of GPU memory
|
||||
by adjusting the `gpu_memory_utilization` parameter.
|
||||
:::{tip}
|
||||
You may limit the usage of GPU memory
|
||||
by adjusting the `gpu_memory_utilization` parameter.
|
||||
:::
|
||||
"""
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
|
||||
@@ -10,7 +10,7 @@ def sanity_check_mm_encoder_outputs(
|
||||
) -> None:
|
||||
"""
|
||||
Perform sanity checks for the result of
|
||||
:meth:`vllm.model_executor.models.SupportsMultiModal.get_multimodal_embeddings`.
|
||||
{meth}`vllm.model_executor.models.SupportsMultiModal.get_multimodal_embeddings`.
|
||||
"""
|
||||
assert isinstance(mm_embeddings, (list, tuple, torch.Tensor)), (
|
||||
"Expected multimodal embeddings to be a list/tuple of 2D tensors, "
|
||||
@@ -39,7 +39,7 @@ def scatter_mm_placeholders(
|
||||
Scatter the multimodal embeddings into a contiguous tensor that represents
|
||||
the placeholder tokens.
|
||||
|
||||
:class:`vllm.multimodal.processing.PromptUpdateDetails.is_embed`.
|
||||
{class}`vllm.multimodal.processing.PromptUpdateDetails.is_embed`.
|
||||
|
||||
Args:
|
||||
embeds: The multimodal embeddings.
|
||||
@@ -66,7 +66,7 @@ def gather_mm_placeholders(
|
||||
"""
|
||||
Reconstructs the embeddings from the placeholder tokens.
|
||||
|
||||
This is the operation of :func:`scatter_mm_placeholders`.
|
||||
This is the operation of {func}`scatter_mm_placeholders`.
|
||||
"""
|
||||
if is_embed is None:
|
||||
return placeholders
|
||||
|
||||
Reference in New Issue
Block a user