[Frontend][Bugfix] Disallow extra fields in OpenAI API (#4355)

This commit is contained in:
Cyrus Leung
2024-04-27 13:08:24 +08:00
committed by GitHub
parent 12628d3c78
commit 8947bc3c15
8 changed files with 113 additions and 55 deletions

View File

@@ -11,7 +11,8 @@ from vllm.entrypoints.openai.protocol import (CompletionRequest,
CompletionResponseStreamChoice,
CompletionStreamResponse,
LogProbs, UsageInfo)
from vllm.entrypoints.openai.serving_engine import LoRA, OpenAIServing
from vllm.entrypoints.openai.serving_engine import (LoRAModulePath,
OpenAIServing)
from vllm.logger import init_logger
from vllm.model_executor.guided_decoding import (
get_guided_decoding_logits_processor)
@@ -54,7 +55,7 @@ class OpenAIServingCompletion(OpenAIServing):
def __init__(self,
engine: AsyncLLMEngine,
served_model_names: List[str],
lora_modules: Optional[List[LoRA]] = None):
lora_modules: Optional[List[LoRAModulePath]] = None):
super().__init__(engine=engine,
served_model_names=served_model_names,
lora_modules=lora_modules)
@@ -84,7 +85,7 @@ class OpenAIServingCompletion(OpenAIServing):
created_time = int(time.time())
# Schedule the request and get the result generator.
generators = []
generators: List[AsyncIterator[RequestOutput]] = []
try:
sampling_params = request.to_sampling_params()
lora_request = self._maybe_get_lora(request)
@@ -148,7 +149,7 @@ class OpenAIServingCompletion(OpenAIServing):
num_prompts=len(prompts))
# Non-streaming response
final_res_batch: RequestOutput = [None] * len(prompts)
final_res_batch: List[Optional[RequestOutput]] = [None] * len(prompts)
try:
async for i, res in result_generator:
if await raw_request.is_disconnected():