2023-07-03 11:31:55 -07:00
|
|
|
"""vLLM: a high-throughput and memory-efficient inference engine for LLMs"""
|
|
|
|
|
|
2024-03-08 10:52:20 -08:00
|
|
|
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
|
|
|
|
|
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
|
|
|
|
from vllm.engine.llm_engine import LLMEngine
|
|
|
|
|
from vllm.entrypoints.llm import LLM
|
2024-04-24 23:52:22 -07:00
|
|
|
from vllm.executor.ray_utils import initialize_ray_cluster
|
2024-04-06 17:11:41 -07:00
|
|
|
from vllm.model_executor.models import ModelRegistry
|
2024-03-08 10:52:20 -08:00
|
|
|
from vllm.outputs import CompletionOutput, RequestOutput
|
|
|
|
|
from vllm.sampling_params import SamplingParams
|
2023-06-17 03:07:40 -07:00
|
|
|
|
2024-04-19 01:00:22 -07:00
|
|
|
__version__ = "0.4.1"
|
2023-06-17 03:07:40 -07:00
|
|
|
|
|
|
|
|
__all__ = [
|
|
|
|
|
"LLM",
|
2024-04-06 17:11:41 -07:00
|
|
|
"ModelRegistry",
|
2023-06-17 03:07:40 -07:00
|
|
|
"SamplingParams",
|
|
|
|
|
"RequestOutput",
|
|
|
|
|
"CompletionOutput",
|
|
|
|
|
"LLMEngine",
|
|
|
|
|
"EngineArgs",
|
|
|
|
|
"AsyncLLMEngine",
|
|
|
|
|
"AsyncEngineArgs",
|
2024-03-11 11:03:45 -07:00
|
|
|
"initialize_ray_cluster",
|
2023-06-17 03:07:40 -07:00
|
|
|
]
|