[Core] Adding Priority Scheduling (#5958)

This commit is contained in:
Archit Patke
2024-09-24 21:50:50 -05:00
committed by GitHub
parent 01b6f9e1f0
commit 6da1ab6b41
6 changed files with 410 additions and 8 deletions

View File

@@ -320,7 +320,8 @@ class LLM:
lora_request: Optional[Union[List[LoRARequest], LoRARequest]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
guided_options_request: Optional[Union[LLMGuidedOptions,
GuidedDecodingRequest]] = None
GuidedDecodingRequest]] = None,
priority: Optional[List[int]] = None,
) -> List[RequestOutput]:
"""Generates the completions for the input prompts.
@@ -339,6 +340,8 @@ class LLM:
lora_request: LoRA request to use for generation, if any.
prompt_adapter_request: Prompt Adapter request to use for
generation, if any.
priority: The priority of the requests, if any.
Only applicable when priority scheduling policy is enabled.
Returns:
A list of ``RequestOutput`` objects containing the
@@ -379,7 +382,8 @@ class LLM:
params=sampling_params,
lora_request=lora_request,
prompt_adapter_request=prompt_adapter_request,
guided_options=guided_options_request)
guided_options=guided_options_request,
priority=priority)
outputs = self._run_engine(use_tqdm=use_tqdm)
return LLMEngine.validate_outputs(outputs, RequestOutput)
@@ -782,6 +786,7 @@ class LLM:
lora_request: Optional[Union[Sequence[LoRARequest], LoRARequest]],
prompt_adapter_request: Optional[PromptAdapterRequest],
guided_options: Optional[GuidedDecodingRequest] = None,
priority: Optional[List[int]] = None,
) -> None:
if isinstance(inputs, (str, dict)):
# Convert a single prompt to a list.
@@ -811,6 +816,7 @@ class LLM:
lora_request=lora_request[i] if isinstance(
lora_request, Sequence) else lora_request,
prompt_adapter_request=prompt_adapter_request,
priority=priority[i] if priority else 0,
)
def _add_request(
@@ -819,6 +825,7 @@ class LLM:
params: Union[SamplingParams, PoolingParams],
lora_request: Optional[LoRARequest] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
priority: int = 0,
) -> None:
request_id = str(next(self.request_counter))
self.llm_engine.add_request(
@@ -827,6 +834,7 @@ class LLM:
params,
lora_request=lora_request,
prompt_adapter_request=prompt_adapter_request,
priority=priority,
)
def _add_guided_processor(