Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-05 15:06:22 +01:00
committed by GitHub
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions

View File

@@ -8,8 +8,7 @@ import torch
from tqdm import tqdm
from vllm.config import KVTransferConfig
from vllm.distributed.kv_transfer.kv_lookup_buffer.simple_buffer import (
SimpleBuffer)
from vllm.distributed.kv_transfer.kv_lookup_buffer.simple_buffer import SimpleBuffer
from vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe import PyNcclPipe
# TODO: the test depends on a lot of fields in the current implementation.
@@ -17,7 +16,6 @@ from vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe import PyNcclPipe
def test_run(my_rank, buffer, device):
# buffer should be empty in the beginning
if my_rank == 0:
assert buffer.buffer_size == 0
@@ -27,7 +25,7 @@ def test_run(my_rank, buffer, device):
# insert
tokens = torch.tensor([1, 2, 3]).to(device)
roi = (tokens > 0)
roi = tokens > 0
if my_rank == 0:
key = 2.0 * torch.ones([5, 6]).to(device)
value = 3.0 * torch.ones([5, 6]).to(device)
@@ -55,7 +53,6 @@ def test_run(my_rank, buffer, device):
def stress_test(my_rank, buf, device):
torch.distributed.barrier()
torch.manual_seed(100)
@@ -66,7 +63,8 @@ def stress_test(my_rank, buf, device):
torch.rand(100).to(device), # key
torch.rand(100).to(device), # value
torch.rand(100).to(device), # hidden
) for i in tqdm(range(200))
)
for i in tqdm(range(200))
]
random.seed(my_rank)
@@ -115,12 +113,11 @@ def stress_test(my_rank, buf, device):
if __name__ == "__main__":
my_rank = int(os.environ['RANK'])
my_rank = int(os.environ["RANK"])
torch.distributed.init_process_group(
backend='gloo',
init_method='tcp://localhost:12398',
backend="gloo",
init_method="tcp://localhost:12398",
world_size=2,
rank=my_rank,
)
@@ -128,8 +125,8 @@ if __name__ == "__main__":
print(f"initialized! My rank is {my_rank}")
config = KVTransferConfig(
kv_connector='P2pNcclConnector',
kv_buffer_device='cuda',
kv_connector="P2pNcclConnector",
kv_buffer_device="cuda",
kv_buffer_size=1e9,
kv_rank=my_rank,
kv_role="kv_both", # this arg doesn't matter in this test
@@ -160,4 +157,4 @@ if __name__ == "__main__":
buffer.close()
data_pipe.close()
cpu_pipe.close()
print('Done')
print("Done")

View File

@@ -9,21 +9,19 @@ import torch
def run_python_script(script_name, timeout):
script_name = f'kv_transfer/{script_name}'
script_name = f"kv_transfer/{script_name}"
try:
# Start both processes asynchronously using Popen
process0 = subprocess.Popen(
[sys.executable, script_name],
env={"RANK":
"0"}, # Set the RANK environment variable for process 0
env={"RANK": "0"}, # Set the RANK environment variable for process 0
stdout=sys.stdout, # Pipe stdout to current stdout
stderr=sys.stderr, # Pipe stderr to current stderr
)
process1 = subprocess.Popen(
[sys.executable, script_name],
env={"RANK":
"1"}, # Set the RANK environment variable for process 1
env={"RANK": "1"}, # Set the RANK environment variable for process 1
stdout=sys.stdout, # Pipe stdout to current stdout
stderr=sys.stderr, # Pipe stderr to current stderr
)
@@ -34,11 +32,9 @@ def run_python_script(script_name, timeout):
# Check the return status of both processes
if process0.returncode != 0:
pytest.fail(
f"Test {script_name} failed for RANK=0, {process0.returncode}")
pytest.fail(f"Test {script_name} failed for RANK=0, {process0.returncode}")
if process1.returncode != 0:
pytest.fail(
f"Test {script_name} failed for RANK=1, {process1.returncode}")
pytest.fail(f"Test {script_name} failed for RANK=1, {process1.returncode}")
except subprocess.TimeoutExpired:
# If either process times out, terminate both and fail the test
@@ -53,15 +49,14 @@ def run_python_script(script_name, timeout):
@pytest.mark.parametrize(
"script_name,timeout",
[
("test_lookup_buffer.py",
60), # Second test case with a 60-second timeout
("test_send_recv.py", 120) # First test case with a 120-second timeout
])
("test_lookup_buffer.py", 60), # Second test case with a 60-second timeout
("test_send_recv.py", 120), # First test case with a 120-second timeout
],
)
def test_run_python_script(script_name, timeout):
# Check the number of GPUs
if torch.cuda.device_count() < 2:
pytest.skip(
f"Skipping test {script_name} because <2 GPUs are available")
pytest.skip(f"Skipping test {script_name} because <2 GPUs are available")
# Run the test if there are at least 2 GPUs
run_python_script(script_name, timeout)

View File

@@ -15,7 +15,7 @@ def test_run(my_rank, pipe):
print(f"rank {my_rank} test_run starts....")
# test run
x = torch.tensor([1]).to(pipe.device)
y = torch.tensor([[2., 3., 4., 8.]]).to(pipe.device)
y = torch.tensor([[2.0, 3.0, 4.0, 8.0]]).to(pipe.device)
if my_rank == 0:
pipe.send_tensor(x)
print(f"rank {my_rank} sent tensor x")
@@ -53,9 +53,8 @@ def stress_test(my_rank, pipe):
for i in tqdm(range(500)):
mean = torch.rand(1).item() * 100
std = torch.rand(1).item() * 100
size = torch.randint(900, 1000, (2, ))
x = torch.normal(mean * 1.0, std * 1.0,
size=size.tolist()).to(pipe.device)
size = torch.randint(900, 1000, (2,))
x = torch.normal(mean * 1.0, std * 1.0, size=size.tolist()).to(pipe.device)
# 5% probability of sending a None
if torch.rand(1).item() < 0.05:
@@ -96,20 +95,16 @@ def latency_test(my_rank, pipe, nelement, ntensor):
torch.distributed.barrier()
for i in tqdm(range(500)):
tensors = []
if my_rank == 0:
# create tensor
tensors = [
torch.rand(nelement).to(pipe.device) for _ in range(ntensor)
]
tensors = [torch.rand(nelement).to(pipe.device) for _ in range(ntensor)]
torch.distributed.barrier()
if my_rank == 0:
t = torch.tensor([time.time()],
dtype=torch.float64).to(pipe.device)
t = torch.tensor([time.time()], dtype=torch.float64).to(pipe.device)
for tensor in tensors:
pipe.send_tensor(tensor)
pipe.send_tensor(t)
@@ -121,24 +116,23 @@ def latency_test(my_rank, pipe, nelement, ntensor):
torch.distributed.barrier()
print('Latency test passed.')
print('Latency:', torch.tensor(latencies).mean().item() * 1000, 'ms')
print("Latency test passed.")
print("Latency:", torch.tensor(latencies).mean().item() * 1000, "ms")
if __name__ == "__main__":
my_rank = int(os.environ['RANK'])
my_rank = int(os.environ["RANK"])
torch.distributed.init_process_group(
backend='gloo',
init_method='tcp://localhost:12398',
backend="gloo",
init_method="tcp://localhost:12398",
world_size=2,
rank=my_rank,
)
config = KVTransferConfig(
kv_connector='P2pNcclConnector',
kv_buffer_device='cuda',
kv_connector="P2pNcclConnector",
kv_buffer_device="cuda",
kv_buffer_size=1e9,
kv_rank=my_rank,
kv_role="kv_both", # this arg doesn't matter in this test