[Core] feat: Implement Priority Scheduling in V1 Engine (#19057)

Signed-off-by: amit <amit.man@gmail.com>
Co-authored-by: Roger Wang <Rogerw0108@gmail.com>
This commit is contained in:
amit
2025-06-23 06:18:08 +03:00
committed by GitHub
parent c4cf260677
commit 4a0f7888a3
7 changed files with 896 additions and 30 deletions

View File

@@ -219,8 +219,6 @@ class Processor:
# TODO(woosuk): Support encoder-decoder models.
self._validate_lora(lora_request)
self._validate_params(params, lora_request)
if priority != 0:
raise ValueError("V1 does not support priority yet.")
if trace_headers is not None:
raise ValueError("V1 does not support tracing yet.")
if prompt_adapter_request is not None:
@@ -340,6 +338,7 @@ class Processor:
arrival_time=arrival_time,
lora_request=lora_request,
cache_salt=decoder_inputs.get("cache_salt"),
priority=priority,
data_parallel_rank=data_parallel_rank,
)