diff --git a/tests/test_sharded_state_loader.py b/tests/test_sharded_state_loader.py index 1bb4203d2..42afdfa3c 100644 --- a/tests/test_sharded_state_loader.py +++ b/tests/test_sharded_state_loader.py @@ -118,8 +118,17 @@ def test_sharded_state_loader(enable_lora, tp_size, num_gpus_available, tensor_parallel_size=tp_size, )) p.start() - p.join() + # Call queue.get() before p.join() to prevent deadlock: + # If p.join() is called before queue.get() and the queue is full, + # the child process may block while writing to the queue and never + # terminate, causing the parent to wait indefinitely on p.join(). + # See: https://github.com/vllm-project/vllm/pull/22371#discussion_r2257773814 out_before = queue.get() + p.join() + queue.close() + queue.join_thread() + + queue = ctx.Queue() p = ctx.Process(target=_run_generate, args=(output_dir, queue), @@ -131,7 +140,14 @@ def test_sharded_state_loader(enable_lora, tp_size, num_gpus_available, load_format="sharded_state", )) p.start() - p.join() + # Call queue.get() before p.join() to prevent deadlock: + # If p.join() is called before queue.get() and the queue is full, + # the child process may block while writing to the queue and never + # terminate, causing the parent to wait indefinitely on p.join(). + # See: https://github.com/vllm-project/vllm/pull/22371#discussion_r2257773814 out_after = queue.get() + p.join() + queue.close() + queue.join_thread() assert out_before == out_after