[Core] Support serving encoder/decoder models (#7258)

2024-08-09 10:39:41 +08:00
parent 0fa14907da
commit 7eb4a51c5f
25 changed files with 603 additions and 464 deletions
--- a/vllm/entrypoints/openai/serving_engine.py
+++ b/vllm/entrypoints/openai/serving_engine.py
@@ -22,7 +22,7 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
                                              TokenizeCompletionRequest,
                                              TokenizeRequest)
 # yapf: enable
-from vllm.inputs import parse_and_batch_prompt
+from vllm.inputs.parse import parse_and_batch_prompt
 from vllm.logger import init_logger
 from vllm.lora.request import LoRARequest
 from vllm.model_executor.guided_decoding import (