[Core] Adding Priority Scheduling (#5958)

This commit is contained in:
Archit Patke
2024-09-24 21:50:50 -05:00
committed by GitHub
parent 01b6f9e1f0
commit 6da1ab6b41
6 changed files with 410 additions and 8 deletions

View File

@@ -646,6 +646,7 @@ class SequenceGroup:
unless you are working with an encoder/decoder model.
trace_headers: OpenTelemetry trace headers.
prompt_adapter_request: Prompt Adapter request.
priority: User-defined priority of the request.
"""
def __init__(
@@ -660,9 +661,11 @@ class SequenceGroup:
encoder_seq: Optional[Sequence] = None,
trace_headers: Optional[Mapping[str, str]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
priority: int = 0,
) -> None:
self.request_id = request_id
self.seqs = seqs
self.arrival_time = arrival_time
self.is_single_seq = len(seqs) == 1
self.seqs_dict = {seq.seq_id: seq for seq in seqs}
@@ -680,6 +683,7 @@ class SequenceGroup:
self.prompt_adapter_request = prompt_adapter_request
self.encoder_seq = encoder_seq
self.trace_headers = trace_headers
self.priority = priority
self.cached_request_output = None