diff --git a/tests/v1/e2e/test_async_scheduling.py b/tests/v1/e2e/test_async_scheduling.py index dbe403ece..c4aca8241 100644 --- a/tests/v1/e2e/test_async_scheduling.py +++ b/tests/v1/e2e/test_async_scheduling.py @@ -65,9 +65,8 @@ def test_without_spec_decoding( (True, "mp", True, None, False), (True, "uni", True, None, False), (False, "mp", True, None, True), - # Async scheduling + preemption + chunked prefill needs to be fixed (WIP) - # (True, "mp", True, None, True), - # (True, "uni", True, None, True), + (True, "mp", True, None, True), + (True, "uni", True, None, True), ] run_tests( @@ -103,9 +102,8 @@ def test_with_spec_decoding(monkeypatch: pytest.MonkeyPatch): (False, "mp", True, spec_config_short, True), (True, "uni", True, spec_config, False), (True, "uni", True, spec_config_short, False), - # Async scheduling + preemption + chunked prefill needs to be fixed (WIP) - # (True, "mp", True, spec_config, True), - # (True, "uni", True, spec_config_short, True), + (True, "mp", True, spec_config, True), + (True, "uni", True, spec_config_short, True), ] run_tests( diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index bc15979de..8e6254233 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -778,9 +778,7 @@ class Scheduler(SchedulerInterface): assert not scheduled_in_prev_step resumed_req_ids.add(req_id) if not scheduled_in_prev_step: - all_token_ids[req_id] = req.all_token_ids[ - : req.num_computed_tokens + num_tokens - ] + all_token_ids[req_id] = req.all_token_ids.copy() new_block_ids.append( req_to_new_blocks[req_id].get_block_ids(allow_none=True) ) diff --git a/vllm/v1/utils.py b/vllm/v1/utils.py index a401f6d74..29099d1e9 100644 --- a/vllm/v1/utils.py +++ b/vllm/v1/utils.py @@ -97,6 +97,9 @@ class ConstantList(Generic[T], Sequence): def __repr__(self): return f"ConstantList({self._x})" + def copy(self) -> list[T]: + return self._x.copy() + class CpuGpuBuffer: """Buffer to easily copy tensors between CPU and GPU."""