[Core] [Frontend] Priority scheduling for embeddings and in the OpenAI-API (#8965)
This commit is contained in:
committed by
GitHub
parent
1fe0a4264a
commit
35bd215168
@@ -235,6 +235,7 @@ class OpenAIServingChat(OpenAIServing):
|
||||
lora_request=lora_request,
|
||||
trace_headers=trace_headers,
|
||||
prompt_adapter_request=prompt_adapter_request,
|
||||
priority=request.priority,
|
||||
)
|
||||
except ValueError as e:
|
||||
# TODO: Use a vllm-specific Validation Error
|
||||
|
||||
Reference in New Issue
Block a user