[Core] Rework handling of async scheduling config (#28250)

Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Nick Hill
2025-11-07 12:01:23 -08:00
committed by GitHub
parent 18903216f5
commit da786e339e
6 changed files with 121 additions and 71 deletions

View File

@@ -66,7 +66,7 @@ def test_engine_core():
assert len(engine_core.scheduler.waiting) == 1
assert len(engine_core.scheduler.running) == 0
_ = engine_core.step()
_ = engine_core.step_fn()
assert len(engine_core.scheduler.waiting) == 0
assert len(engine_core.scheduler.running) == 1
@@ -75,7 +75,7 @@ def test_engine_core():
assert len(engine_core.scheduler.waiting) == 1
assert len(engine_core.scheduler.running) == 1
_ = engine_core.step()
_ = engine_core.step_fn()
assert len(engine_core.scheduler.waiting) == 0
assert len(engine_core.scheduler.running) == 2
@@ -85,12 +85,12 @@ def test_engine_core():
assert len(engine_core.scheduler.waiting) == 2
assert len(engine_core.scheduler.running) == 2
_ = engine_core.step()
_ = engine_core.step_fn()
assert len(engine_core.scheduler.waiting) == 0
assert len(engine_core.scheduler.running) == 4
# Loop through until they are all done.
while (outs := engine_core.step()[0].get(0)) and outs.outputs:
while (outs := engine_core.step_fn()[0].get(0)) and outs.outputs:
pass
assert len(engine_core.scheduler.waiting) == 0
@@ -107,7 +107,7 @@ def test_engine_core():
assert engine_core.scheduler.has_unfinished_requests()
assert not engine_core.scheduler.has_finished_requests()
_ = engine_core.step()
_ = engine_core.step_fn()
assert len(engine_core.scheduler.waiting) == 0
assert len(engine_core.scheduler.running) == 1
assert engine_core.scheduler.has_unfinished_requests()
@@ -119,7 +119,7 @@ def test_engine_core():
assert not engine_core.scheduler.has_unfinished_requests()
assert engine_core.scheduler.has_finished_requests()
_ = engine_core.step()
_ = engine_core.step_fn()
assert not engine_core.scheduler.has_unfinished_requests()
assert not engine_core.scheduler.has_finished_requests()
@@ -133,7 +133,7 @@ def test_engine_core():
assert len(engine_core.scheduler.waiting) == 2
assert len(engine_core.scheduler.running) == 0
_ = engine_core.step()
_ = engine_core.step_fn()
assert len(engine_core.scheduler.waiting) == 0
assert len(engine_core.scheduler.running) == 2
@@ -141,7 +141,7 @@ def test_engine_core():
assert len(engine_core.scheduler.waiting) == 1
assert len(engine_core.scheduler.running) == 2
_ = engine_core.step()
_ = engine_core.step_fn()
assert len(engine_core.scheduler.waiting) == 0
assert len(engine_core.scheduler.running) == 3
@@ -150,7 +150,7 @@ def test_engine_core():
assert len(engine_core.scheduler.waiting) == 0
assert len(engine_core.scheduler.running) == 2
_ = engine_core.step()
_ = engine_core.step_fn()
assert len(engine_core.scheduler.waiting) == 0
assert len(engine_core.scheduler.running) == 2
@@ -165,12 +165,12 @@ def test_engine_core():
req0.request_id = req1.request_id = "test"
engine_core.add_request(*engine_core.preprocess_add_request(req0))
while (outs := engine_core.step()[0].get(0)) and outs.outputs:
pass
while engine_core.scheduler.has_requests():
engine_core.step_fn()
engine_core.add_request(*engine_core.preprocess_add_request(req1))
while (outs := engine_core.step()[0].get(0)) and outs.outputs:
pass
while engine_core.scheduler.has_requests():
engine_core.step_fn()
assert len(engine_core.scheduler.waiting) == 0
assert len(engine_core.scheduler.running) == 0
@@ -208,8 +208,8 @@ def test_engine_core_advanced_sampling():
assert len(engine_core.scheduler.waiting) == 1
assert len(engine_core.scheduler.running) == 0
# Loop through until they are all done.
while (outs := engine_core.step()[0].get(0)) and outs.outputs:
pass
while engine_core.scheduler.has_requests():
engine_core.step_fn()
assert len(engine_core.scheduler.waiting) == 0
assert len(engine_core.scheduler.running) == 0
@@ -297,6 +297,8 @@ def test_engine_core_concurrent_batches():
max_num_batched_tokens=10,
# Reduce startup time.
enforce_eager=True,
# Test concurrent batch behaviour independently of async scheduling.
async_scheduling=False,
)
vllm_config = engine_args.create_engine_config()
with set_default_torch_num_threads(1):