[Frontend] merge beam search implementations (#9296)

This commit is contained in:
Brendan Wong
2024-10-14 15:05:52 -07:00
committed by GitHub
parent 473e7b3606
commit 4d31cd424b
5 changed files with 145 additions and 234 deletions

View File

@@ -9,8 +9,6 @@ from typing import Union
from fastapi import Request
from vllm.config import ModelConfig
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.engine.multiprocessing.client import MQLLMEngineClient
from vllm.engine.protocol import EngineClient
from vllm.entrypoints.chat_utils import (ConversationMessage,
apply_hf_chat_template,
@@ -237,11 +235,6 @@ class OpenAIServingChat(OpenAIServing):
log_tracing_disabled_warning()
if isinstance(sampling_params, BeamSearchParams):
assert isinstance(self.engine_client,
(AsyncLLMEngine,
MQLLMEngineClient)), \
"Beam search is only supported with" \
"AsyncLLMEngine and MQLLMEngineClient."
result_generator = self.engine_client.beam_search(
engine_inputs['prompt_token_ids'],
request_id,