[Core] Support serving encoder/decoder models (#7258)

This commit is contained in:
Cyrus Leung
2024-08-09 10:39:41 +08:00
committed by GitHub
parent 0fa14907da
commit 7eb4a51c5f
25 changed files with 603 additions and 464 deletions

View File

@@ -22,7 +22,7 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
TokenizeCompletionRequest,
TokenizeRequest)
# yapf: enable
from vllm.inputs import parse_and_batch_prompt
from vllm.inputs.parse import parse_and_batch_prompt
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.model_executor.guided_decoding import (