[DOC] Add additional comments for LLMEngine and AsyncLLMEngine (#1011)
This commit is contained in:
@@ -253,7 +253,8 @@ class AsyncLLMEngine:
|
||||
log_requests: Whether to log the requests.
|
||||
start_engine_loop: If True, the background task to run the engine
|
||||
will be automatically started in the generate call.
|
||||
*args, *kwargs: Arguments for LLMEngine.
|
||||
*args: Arguments for LLMEngine.
|
||||
*kwargs: Arguments for LLMEngine.
|
||||
"""
|
||||
|
||||
_engine_class: Type[_AsyncLLMEngine] = _AsyncLLMEngine
|
||||
@@ -428,6 +429,49 @@ class AsyncLLMEngine:
|
||||
Yields:
|
||||
The output `RequestOutput` objects from the LLMEngine for the
|
||||
request.
|
||||
|
||||
Details:
|
||||
- If the engine is not running, start the background loop,
|
||||
which iteratively invokes
|
||||
:meth:`~vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step`
|
||||
to process the waiting requests.
|
||||
- Add the request to the engine's `RequestTracker`.
|
||||
On the next background loop, this request will be sent to
|
||||
the underlying engine.
|
||||
Also, a corresponding `AsyncStream` will be created.
|
||||
- Wait for the request outputs from `AsyncStream` and yield them.
|
||||
|
||||
Example:
|
||||
>>> # Please refer to entrypoints/api_server.py for
|
||||
>>> # the complete example.
|
||||
>>>
|
||||
>>> # initialize the engine and the example input
|
||||
>>> engine = AsyncLLMEngine.from_engine_args(engine_args)
|
||||
>>> example_input = {
|
||||
>>> "prompt": "What is LLM?",
|
||||
>>> "stream": False, # assume the non-streaming case
|
||||
>>> "temperature": 0.0,
|
||||
>>> "request_id": 0,
|
||||
>>> }
|
||||
>>>
|
||||
>>> # start the generation
|
||||
>>> results_generator = engine.generate(
|
||||
>>> example_input["prompt"],
|
||||
>>> SamplingParams(temperature=example_input["temperature"]),
|
||||
>>> example_input["request_id"])
|
||||
>>>
|
||||
>>> # get the results
|
||||
>>> final_output = None
|
||||
>>> async for request_output in results_generator:
|
||||
>>> if await request.is_disconnected():
|
||||
>>> # Abort the request if the client disconnects.
|
||||
>>> await engine.abort(request_id)
|
||||
>>> # Return or raise an error
|
||||
>>> ...
|
||||
>>> final_output = request_output
|
||||
>>>
|
||||
>>> # Process and return the final output
|
||||
>>> ...
|
||||
"""
|
||||
# Preprocess the request.
|
||||
# This should not be used for logging, as it is monotonic time.
|
||||
|
||||
Reference in New Issue
Block a user