[Core] [Frontend] Priority scheduling for embeddings and in the OpenAI-API (#8965)

This commit is contained in:
Sebastian Schoennenbeck
2024-10-01 11:58:06 +02:00
committed by GitHub
parent 1fe0a4264a
commit 35bd215168
8 changed files with 53 additions and 5 deletions

View File

@@ -148,6 +148,7 @@ class OpenAIServingCompletion(OpenAIServing):
lora_request=lora_request,
prompt_adapter_request=prompt_adapter_request,
trace_headers=trace_headers,
priority=request.priority,
)
generators.append(generator)