[Refactor] [7/N] to simplify the vLLM lora serving architecture (#32251)
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
@@ -10,10 +10,12 @@ from vllm.config import ModelConfig
|
||||
from vllm.engine.protocol import EngineClient
|
||||
from vllm.entrypoints.openai.engine.protocol import (
|
||||
ErrorResponse,
|
||||
)
|
||||
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
|
||||
from vllm.entrypoints.serve.lora.protocol import (
|
||||
LoadLoRAAdapterRequest,
|
||||
UnloadLoRAAdapterRequest,
|
||||
)
|
||||
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
|
||||
from vllm.lora.request import LoRARequest
|
||||
|
||||
MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM"
|
||||
|
||||
@@ -1247,16 +1247,6 @@ StreamingResponsesResponse: TypeAlias = (
|
||||
)
|
||||
|
||||
|
||||
class LoadLoRAAdapterRequest(BaseModel):
|
||||
lora_name: str
|
||||
lora_path: str
|
||||
|
||||
|
||||
class UnloadLoRAAdapterRequest(BaseModel):
|
||||
lora_name: str
|
||||
lora_int_id: int | None = Field(default=None)
|
||||
|
||||
|
||||
## Protocols for Audio
|
||||
AudioResponseFormat: TypeAlias = Literal["json", "text", "srt", "verbose_json", "vtt"]
|
||||
|
||||
|
||||
@@ -10,10 +10,12 @@ from vllm.engine.protocol import EngineClient
|
||||
from vllm.entrypoints.openai.engine.protocol import (
|
||||
ErrorInfo,
|
||||
ErrorResponse,
|
||||
LoadLoRAAdapterRequest,
|
||||
ModelCard,
|
||||
ModelList,
|
||||
ModelPermission,
|
||||
)
|
||||
from vllm.entrypoints.serve.lora.protocol import (
|
||||
LoadLoRAAdapterRequest,
|
||||
UnloadLoRAAdapterRequest,
|
||||
)
|
||||
from vllm.logger import init_logger
|
||||
|
||||
@@ -10,10 +10,12 @@ from vllm import envs
|
||||
from vllm.entrypoints.openai.api_server import models, validate_json_request
|
||||
from vllm.entrypoints.openai.engine.protocol import (
|
||||
ErrorResponse,
|
||||
)
|
||||
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
|
||||
from vllm.entrypoints.serve.lora.protocol import (
|
||||
LoadLoRAAdapterRequest,
|
||||
UnloadLoRAAdapterRequest,
|
||||
)
|
||||
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
|
||||
from vllm.logger import init_logger
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
14
vllm/entrypoints/serve/lora/protocol.py
Normal file
14
vllm/entrypoints/serve/lora/protocol.py
Normal file
@@ -0,0 +1,14 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class LoadLoRAAdapterRequest(BaseModel):
|
||||
lora_name: str
|
||||
lora_path: str
|
||||
|
||||
|
||||
class UnloadLoRAAdapterRequest(BaseModel):
|
||||
lora_name: str
|
||||
lora_int_id: int | None = Field(default=None)
|
||||
Reference in New Issue
Block a user