[core][distributed] zmq fallback for broadcasting large objects (#6183)
[core][distributed] add zmq fallback for broadcasting large objects (#6183)
This commit is contained in:
@@ -6,8 +6,7 @@ from typing import List
|
||||
import numpy as np
|
||||
import torch.distributed as dist
|
||||
|
||||
from vllm.distributed.device_communicators.shm_broadcast import (
|
||||
ShmRingBuffer, ShmRingBufferIO)
|
||||
from vllm.distributed.device_communicators.shm_broadcast import MessageQueue
|
||||
from vllm.utils import update_environment_variables
|
||||
|
||||
|
||||
@@ -56,8 +55,8 @@ def worker_fn_wrapper(fn):
|
||||
@worker_fn_wrapper
|
||||
def worker_fn():
|
||||
writer_rank = 2
|
||||
broadcaster = ShmRingBufferIO.create_from_process_group(
|
||||
dist.group.WORLD, 1024 * 1024, 2, writer_rank)
|
||||
broadcaster = MessageQueue.create_from_process_group(
|
||||
dist.group.WORLD, 40 * 1024, 2, writer_rank)
|
||||
if dist.get_rank() == writer_rank:
|
||||
seed = random.randint(0, 1000)
|
||||
dist.broadcast_object_list([seed], writer_rank)
|
||||
@@ -87,13 +86,3 @@ def worker_fn():
|
||||
|
||||
def test_shm_broadcast():
|
||||
distributed_run(worker_fn, 4)
|
||||
|
||||
|
||||
def test_singe_process():
|
||||
buffer = ShmRingBuffer(1, 1024, 4)
|
||||
reader = ShmRingBufferIO(buffer, reader_rank=0)
|
||||
writer = ShmRingBufferIO(buffer, reader_rank=-1)
|
||||
writer.enqueue([0])
|
||||
writer.enqueue([1])
|
||||
assert reader.dequeue() == [0]
|
||||
assert reader.dequeue() == [1]
|
||||
|
||||
Reference in New Issue
Block a user