[Doc] Convert Sphinx directives ( {class}, {meth}, {attr}, ...) to MkDocs format for better documentation linking (#18663)
Signed-off-by: Zerohertz <ohg3417@gmail.com>
This commit is contained in:
@@ -475,7 +475,8 @@ class _AsyncLLMEngine(LLMEngine):
|
||||
*,
|
||||
inputs: Optional[PromptType] = None, # DEPRECATED
|
||||
) -> None:
|
||||
"""Async version of {meth}`add_request`."""
|
||||
"""Async version of
|
||||
[`add_request`][vllm.engine.llm_engine.LLMEngine.add_request]."""
|
||||
if inputs is not None:
|
||||
prompt = inputs
|
||||
assert prompt is not None and params is not None
|
||||
@@ -582,20 +583,21 @@ async def build_guided_decoding_logits_processor_async(
|
||||
|
||||
|
||||
class AsyncLLMEngine(EngineClient):
|
||||
"""An asynchronous wrapper for {class}`LLMEngine`.
|
||||
"""An asynchronous wrapper for [`LLMEngine`][vllm.LLMEngine].
|
||||
|
||||
This class is used to wrap the {class}`LLMEngine` class to make it
|
||||
asynchronous. It uses asyncio to create a background loop that keeps
|
||||
processing incoming requests. The {class}`LLMEngine` is kicked by the
|
||||
generate method when there are requests in the waiting queue. The generate
|
||||
method yields the outputs from the {class}`LLMEngine` to the caller.
|
||||
This class is used to wrap the [`LLMEngine`][vllm.LLMEngine] class to
|
||||
make it asynchronous. It uses asyncio to create a background loop that keeps
|
||||
processing incoming requests. The [`LLMEngine`][vllm.LLMEngine] is kicked
|
||||
by the generate method when there are requests in the waiting queue. The
|
||||
generate method yields the outputs from the [`LLMEngine`][vllm.LLMEngine]
|
||||
to the caller.
|
||||
|
||||
Args:
|
||||
log_requests: Whether to log the requests.
|
||||
start_engine_loop: If True, the background task to run the engine
|
||||
will be automatically started in the generate call.
|
||||
*args: Arguments for {class}`LLMEngine`.
|
||||
**kwargs: Arguments for {class}`LLMEngine`.
|
||||
*args: Arguments for [`LLMEngine`][vllm.LLMEngine].
|
||||
**kwargs: Arguments for [`LLMEngine`][vllm.LLMEngine].
|
||||
"""
|
||||
|
||||
_engine_class: Type[_AsyncLLMEngine] = _AsyncLLMEngine
|
||||
@@ -985,8 +987,9 @@ class AsyncLLMEngine(EngineClient):
|
||||
from the LLMEngine to the caller.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to the LLM. See {class}`~vllm.inputs.PromptType`
|
||||
for more details about the format of each input.
|
||||
prompt: The prompt to the LLM. See
|
||||
[`PromptType`][vllm.inputs.PromptType] for more details about
|
||||
the format of each input.
|
||||
sampling_params: The sampling parameters of the request.
|
||||
request_id: The unique id of the request.
|
||||
lora_request: LoRA request to use for generation, if any.
|
||||
@@ -1003,7 +1006,7 @@ class AsyncLLMEngine(EngineClient):
|
||||
Details:
|
||||
- If the engine is not running, start the background loop,
|
||||
which iteratively invokes
|
||||
{meth}`~vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step`
|
||||
[`engine_step`][vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step]
|
||||
to process the waiting requests.
|
||||
- Add the request to the engine's `RequestTracker`.
|
||||
On the next background loop, this request will be sent to
|
||||
@@ -1075,8 +1078,9 @@ class AsyncLLMEngine(EngineClient):
|
||||
from the LLMEngine to the caller.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to the LLM. See {class}`~vllm.inputs.PromptType`
|
||||
for more details about the format of each input.
|
||||
prompt: The prompt to the LLM. See
|
||||
[`PromptType`][vllm.inputs.PromptType] for more details about
|
||||
the format of each input.
|
||||
pooling_params: The pooling parameters of the request.
|
||||
request_id: The unique id of the request.
|
||||
lora_request: LoRA request to use for generation, if any.
|
||||
@@ -1089,15 +1093,15 @@ class AsyncLLMEngine(EngineClient):
|
||||
for the request.
|
||||
|
||||
Details:
|
||||
- If the engine is not running, start the background loop,
|
||||
which iteratively invokes
|
||||
{meth}`~vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step`
|
||||
to process the waiting requests.
|
||||
- Add the request to the engine's `RequestTracker`.
|
||||
On the next background loop, this request will be sent to
|
||||
the underlying engine.
|
||||
Also, a corresponding `AsyncStream` will be created.
|
||||
- Wait for the request outputs from `AsyncStream` and yield them.
|
||||
- If the engine is not running, start the background loop,
|
||||
which iteratively invokes
|
||||
[`vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step`][]
|
||||
to process the waiting requests.
|
||||
- Add the request to the engine's `RequestTracker`.
|
||||
On the next background loop, this request will be sent to
|
||||
the underlying engine.
|
||||
Also, a corresponding `AsyncStream` will be created.
|
||||
- Wait for the request outputs from `AsyncStream` and yield them.
|
||||
|
||||
Example:
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user