Convert formatting to use ruff instead of yapf + isort (#26247)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -8,8 +8,7 @@ import torch
|
||||
from tqdm import tqdm
|
||||
|
||||
from vllm.config import KVTransferConfig
|
||||
from vllm.distributed.kv_transfer.kv_lookup_buffer.simple_buffer import (
|
||||
SimpleBuffer)
|
||||
from vllm.distributed.kv_transfer.kv_lookup_buffer.simple_buffer import SimpleBuffer
|
||||
from vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe import PyNcclPipe
|
||||
|
||||
# TODO: the test depends on a lot of fields in the current implementation.
|
||||
@@ -17,7 +16,6 @@ from vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe import PyNcclPipe
|
||||
|
||||
|
||||
def test_run(my_rank, buffer, device):
|
||||
|
||||
# buffer should be empty in the beginning
|
||||
if my_rank == 0:
|
||||
assert buffer.buffer_size == 0
|
||||
@@ -27,7 +25,7 @@ def test_run(my_rank, buffer, device):
|
||||
|
||||
# insert
|
||||
tokens = torch.tensor([1, 2, 3]).to(device)
|
||||
roi = (tokens > 0)
|
||||
roi = tokens > 0
|
||||
if my_rank == 0:
|
||||
key = 2.0 * torch.ones([5, 6]).to(device)
|
||||
value = 3.0 * torch.ones([5, 6]).to(device)
|
||||
@@ -55,7 +53,6 @@ def test_run(my_rank, buffer, device):
|
||||
|
||||
|
||||
def stress_test(my_rank, buf, device):
|
||||
|
||||
torch.distributed.barrier()
|
||||
torch.manual_seed(100)
|
||||
|
||||
@@ -66,7 +63,8 @@ def stress_test(my_rank, buf, device):
|
||||
torch.rand(100).to(device), # key
|
||||
torch.rand(100).to(device), # value
|
||||
torch.rand(100).to(device), # hidden
|
||||
) for i in tqdm(range(200))
|
||||
)
|
||||
for i in tqdm(range(200))
|
||||
]
|
||||
|
||||
random.seed(my_rank)
|
||||
@@ -115,12 +113,11 @@ def stress_test(my_rank, buf, device):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
my_rank = int(os.environ['RANK'])
|
||||
my_rank = int(os.environ["RANK"])
|
||||
|
||||
torch.distributed.init_process_group(
|
||||
backend='gloo',
|
||||
init_method='tcp://localhost:12398',
|
||||
backend="gloo",
|
||||
init_method="tcp://localhost:12398",
|
||||
world_size=2,
|
||||
rank=my_rank,
|
||||
)
|
||||
@@ -128,8 +125,8 @@ if __name__ == "__main__":
|
||||
print(f"initialized! My rank is {my_rank}")
|
||||
|
||||
config = KVTransferConfig(
|
||||
kv_connector='P2pNcclConnector',
|
||||
kv_buffer_device='cuda',
|
||||
kv_connector="P2pNcclConnector",
|
||||
kv_buffer_device="cuda",
|
||||
kv_buffer_size=1e9,
|
||||
kv_rank=my_rank,
|
||||
kv_role="kv_both", # this arg doesn't matter in this test
|
||||
@@ -160,4 +157,4 @@ if __name__ == "__main__":
|
||||
buffer.close()
|
||||
data_pipe.close()
|
||||
cpu_pipe.close()
|
||||
print('Done')
|
||||
print("Done")
|
||||
|
||||
@@ -9,21 +9,19 @@ import torch
|
||||
|
||||
|
||||
def run_python_script(script_name, timeout):
|
||||
script_name = f'kv_transfer/{script_name}'
|
||||
script_name = f"kv_transfer/{script_name}"
|
||||
try:
|
||||
# Start both processes asynchronously using Popen
|
||||
process0 = subprocess.Popen(
|
||||
[sys.executable, script_name],
|
||||
env={"RANK":
|
||||
"0"}, # Set the RANK environment variable for process 0
|
||||
env={"RANK": "0"}, # Set the RANK environment variable for process 0
|
||||
stdout=sys.stdout, # Pipe stdout to current stdout
|
||||
stderr=sys.stderr, # Pipe stderr to current stderr
|
||||
)
|
||||
|
||||
process1 = subprocess.Popen(
|
||||
[sys.executable, script_name],
|
||||
env={"RANK":
|
||||
"1"}, # Set the RANK environment variable for process 1
|
||||
env={"RANK": "1"}, # Set the RANK environment variable for process 1
|
||||
stdout=sys.stdout, # Pipe stdout to current stdout
|
||||
stderr=sys.stderr, # Pipe stderr to current stderr
|
||||
)
|
||||
@@ -34,11 +32,9 @@ def run_python_script(script_name, timeout):
|
||||
|
||||
# Check the return status of both processes
|
||||
if process0.returncode != 0:
|
||||
pytest.fail(
|
||||
f"Test {script_name} failed for RANK=0, {process0.returncode}")
|
||||
pytest.fail(f"Test {script_name} failed for RANK=0, {process0.returncode}")
|
||||
if process1.returncode != 0:
|
||||
pytest.fail(
|
||||
f"Test {script_name} failed for RANK=1, {process1.returncode}")
|
||||
pytest.fail(f"Test {script_name} failed for RANK=1, {process1.returncode}")
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
# If either process times out, terminate both and fail the test
|
||||
@@ -53,15 +49,14 @@ def run_python_script(script_name, timeout):
|
||||
@pytest.mark.parametrize(
|
||||
"script_name,timeout",
|
||||
[
|
||||
("test_lookup_buffer.py",
|
||||
60), # Second test case with a 60-second timeout
|
||||
("test_send_recv.py", 120) # First test case with a 120-second timeout
|
||||
])
|
||||
("test_lookup_buffer.py", 60), # Second test case with a 60-second timeout
|
||||
("test_send_recv.py", 120), # First test case with a 120-second timeout
|
||||
],
|
||||
)
|
||||
def test_run_python_script(script_name, timeout):
|
||||
# Check the number of GPUs
|
||||
if torch.cuda.device_count() < 2:
|
||||
pytest.skip(
|
||||
f"Skipping test {script_name} because <2 GPUs are available")
|
||||
pytest.skip(f"Skipping test {script_name} because <2 GPUs are available")
|
||||
|
||||
# Run the test if there are at least 2 GPUs
|
||||
run_python_script(script_name, timeout)
|
||||
|
||||
@@ -15,7 +15,7 @@ def test_run(my_rank, pipe):
|
||||
print(f"rank {my_rank} test_run starts....")
|
||||
# test run
|
||||
x = torch.tensor([1]).to(pipe.device)
|
||||
y = torch.tensor([[2., 3., 4., 8.]]).to(pipe.device)
|
||||
y = torch.tensor([[2.0, 3.0, 4.0, 8.0]]).to(pipe.device)
|
||||
if my_rank == 0:
|
||||
pipe.send_tensor(x)
|
||||
print(f"rank {my_rank} sent tensor x")
|
||||
@@ -53,9 +53,8 @@ def stress_test(my_rank, pipe):
|
||||
for i in tqdm(range(500)):
|
||||
mean = torch.rand(1).item() * 100
|
||||
std = torch.rand(1).item() * 100
|
||||
size = torch.randint(900, 1000, (2, ))
|
||||
x = torch.normal(mean * 1.0, std * 1.0,
|
||||
size=size.tolist()).to(pipe.device)
|
||||
size = torch.randint(900, 1000, (2,))
|
||||
x = torch.normal(mean * 1.0, std * 1.0, size=size.tolist()).to(pipe.device)
|
||||
|
||||
# 5% probability of sending a None
|
||||
if torch.rand(1).item() < 0.05:
|
||||
@@ -96,20 +95,16 @@ def latency_test(my_rank, pipe, nelement, ntensor):
|
||||
torch.distributed.barrier()
|
||||
|
||||
for i in tqdm(range(500)):
|
||||
|
||||
tensors = []
|
||||
|
||||
if my_rank == 0:
|
||||
# create tensor
|
||||
tensors = [
|
||||
torch.rand(nelement).to(pipe.device) for _ in range(ntensor)
|
||||
]
|
||||
tensors = [torch.rand(nelement).to(pipe.device) for _ in range(ntensor)]
|
||||
|
||||
torch.distributed.barrier()
|
||||
|
||||
if my_rank == 0:
|
||||
t = torch.tensor([time.time()],
|
||||
dtype=torch.float64).to(pipe.device)
|
||||
t = torch.tensor([time.time()], dtype=torch.float64).to(pipe.device)
|
||||
for tensor in tensors:
|
||||
pipe.send_tensor(tensor)
|
||||
pipe.send_tensor(t)
|
||||
@@ -121,24 +116,23 @@ def latency_test(my_rank, pipe, nelement, ntensor):
|
||||
|
||||
torch.distributed.barrier()
|
||||
|
||||
print('Latency test passed.')
|
||||
print('Latency:', torch.tensor(latencies).mean().item() * 1000, 'ms')
|
||||
print("Latency test passed.")
|
||||
print("Latency:", torch.tensor(latencies).mean().item() * 1000, "ms")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
my_rank = int(os.environ['RANK'])
|
||||
my_rank = int(os.environ["RANK"])
|
||||
|
||||
torch.distributed.init_process_group(
|
||||
backend='gloo',
|
||||
init_method='tcp://localhost:12398',
|
||||
backend="gloo",
|
||||
init_method="tcp://localhost:12398",
|
||||
world_size=2,
|
||||
rank=my_rank,
|
||||
)
|
||||
|
||||
config = KVTransferConfig(
|
||||
kv_connector='P2pNcclConnector',
|
||||
kv_buffer_device='cuda',
|
||||
kv_connector="P2pNcclConnector",
|
||||
kv_buffer_device="cuda",
|
||||
kv_buffer_size=1e9,
|
||||
kv_rank=my_rank,
|
||||
kv_role="kv_both", # this arg doesn't matter in this test
|
||||
|
||||
Reference in New Issue
Block a user