Files
vllm/vllm/__init__.py

26 lines
742 B
Python
Raw Normal View History

"""vLLM: a high-throughput and memory-efficient inference engine for LLMs"""
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.engine.llm_engine import LLMEngine
from vllm.entrypoints.llm import LLM
from vllm.executor.ray_utils import initialize_ray_cluster
from vllm.model_executor.models import ModelRegistry
from vllm.outputs import CompletionOutput, RequestOutput
from vllm.sampling_params import SamplingParams
2023-06-17 03:07:40 -07:00
2024-04-19 01:00:22 -07:00
__version__ = "0.4.1"
2023-06-17 03:07:40 -07:00
__all__ = [
"LLM",
"ModelRegistry",
2023-06-17 03:07:40 -07:00
"SamplingParams",
"RequestOutput",
"CompletionOutput",
"LLMEngine",
"EngineArgs",
"AsyncLLMEngine",
"AsyncEngineArgs",
"initialize_ray_cluster",
2023-06-17 03:07:40 -07:00
]