[Refactor] [7/N] to simplify the vLLM lora serving architecture (#32251)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
2026-01-13 23:37:34 +08:00
parent 252c011012
commit 4f02cb2eac
5 changed files with 23 additions and 13 deletions
--- a/tests/entrypoints/openai/test_serving_models.py
+++ b/tests/entrypoints/openai/test_serving_models.py
@@ -10,10 +10,12 @@ from vllm.config import ModelConfig
 from vllm.engine.protocol import EngineClient
 from vllm.entrypoints.openai.engine.protocol import (
    ErrorResponse,
+)
+from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
+from vllm.entrypoints.serve.lora.protocol import (
    LoadLoRAAdapterRequest,
    UnloadLoRAAdapterRequest,
 )
-from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
 from vllm.lora.request import LoRARequest

 MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM"