Convert formatting to use ruff instead of yapf + isort (#26247)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -20,26 +20,18 @@ MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
|
||||
BADREQUEST_CASES = [
|
||||
(
|
||||
"test_rank",
|
||||
{
|
||||
"r": 1024
|
||||
},
|
||||
{"r": 1024},
|
||||
"is greater than max_lora_rank",
|
||||
),
|
||||
(
|
||||
"test_bias",
|
||||
{
|
||||
"bias": "all"
|
||||
},
|
||||
{"bias": "all"},
|
||||
"Adapter bias cannot be used without bias_enabled",
|
||||
),
|
||||
("test_dora", {
|
||||
"use_dora": True
|
||||
}, "does not yet support DoRA"),
|
||||
("test_dora", {"use_dora": True}, "does not yet support DoRA"),
|
||||
(
|
||||
"test_modules_to_save",
|
||||
{
|
||||
"modules_to_save": ["lm_head"]
|
||||
},
|
||||
{"modules_to_save": ["lm_head"]},
|
||||
"only supports modules_to_save being None",
|
||||
),
|
||||
]
|
||||
@@ -48,24 +40,23 @@ BADREQUEST_CASES = [
|
||||
@pytest.fixture(scope="module")
|
||||
def monkeypatch_module():
|
||||
from _pytest.monkeypatch import MonkeyPatch
|
||||
|
||||
mpatch = MonkeyPatch()
|
||||
yield mpatch
|
||||
mpatch.undo()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", params=[True])
|
||||
def server_with_lora_modules_json(request, monkeypatch_module,
|
||||
zephyr_lora_files):
|
||||
|
||||
def server_with_lora_modules_json(request, monkeypatch_module, zephyr_lora_files):
|
||||
use_v1 = request.param
|
||||
assert use_v1
|
||||
monkeypatch_module.setenv('VLLM_USE_V1', '1')
|
||||
monkeypatch_module.setenv("VLLM_USE_V1", "1")
|
||||
|
||||
# Define the json format LoRA module configurations
|
||||
lora_module_1 = {
|
||||
"name": "zephyr-lora",
|
||||
"path": zephyr_lora_files,
|
||||
"base_model_name": MODEL_NAME
|
||||
"base_model_name": MODEL_NAME,
|
||||
}
|
||||
|
||||
args = [
|
||||
@@ -96,14 +87,12 @@ def server_with_lora_modules_json(request, monkeypatch_module,
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def client(server_with_lora_modules_json):
|
||||
async with server_with_lora_modules_json.get_async_client(
|
||||
) as async_client:
|
||||
async with server_with_lora_modules_json.get_async_client() as async_client:
|
||||
yield async_client
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_static_lora_lineage(client: openai.AsyncOpenAI,
|
||||
zephyr_lora_files):
|
||||
async def test_static_lora_lineage(client: openai.AsyncOpenAI, zephyr_lora_files):
|
||||
models = await client.models.list()
|
||||
models = models.data
|
||||
served_model = models[0]
|
||||
@@ -111,22 +100,18 @@ async def test_static_lora_lineage(client: openai.AsyncOpenAI,
|
||||
assert served_model.id == MODEL_NAME
|
||||
assert served_model.root == MODEL_NAME
|
||||
assert served_model.parent is None
|
||||
assert all(lora_model.root == zephyr_lora_files
|
||||
for lora_model in lora_models)
|
||||
assert all(lora_model.root == zephyr_lora_files for lora_model in lora_models)
|
||||
assert all(lora_model.parent == MODEL_NAME for lora_model in lora_models)
|
||||
assert lora_models[0].id == "zephyr-lora"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dynamic_lora_lineage(client: openai.AsyncOpenAI,
|
||||
zephyr_lora_files):
|
||||
|
||||
response = await client.post("load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={
|
||||
"lora_name": "zephyr-lora-3",
|
||||
"lora_path": zephyr_lora_files
|
||||
})
|
||||
async def test_dynamic_lora_lineage(client: openai.AsyncOpenAI, zephyr_lora_files):
|
||||
response = await client.post(
|
||||
"load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={"lora_name": "zephyr-lora-3", "lora_path": zephyr_lora_files},
|
||||
)
|
||||
# Ensure adapter loads before querying /models
|
||||
assert "success" in response
|
||||
|
||||
@@ -141,37 +126,37 @@ async def test_dynamic_lora_lineage(client: openai.AsyncOpenAI,
|
||||
@pytest.mark.asyncio
|
||||
async def test_dynamic_lora_not_found(client: openai.AsyncOpenAI):
|
||||
with pytest.raises(openai.NotFoundError):
|
||||
await client.post("load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={
|
||||
"lora_name": "notfound",
|
||||
"lora_path": "/not/an/adapter"
|
||||
})
|
||||
await client.post(
|
||||
"load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={"lora_name": "notfound", "lora_path": "/not/an/adapter"},
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dynamic_lora_invalid_files(client: openai.AsyncOpenAI,
|
||||
tmp_path):
|
||||
async def test_dynamic_lora_invalid_files(client: openai.AsyncOpenAI, tmp_path):
|
||||
invalid_files = tmp_path / "invalid_files"
|
||||
invalid_files.mkdir()
|
||||
(invalid_files / "adapter_config.json").write_text("this is not json")
|
||||
|
||||
with pytest.raises(openai.BadRequestError):
|
||||
await client.post("load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={
|
||||
"lora_name": "invalid-json",
|
||||
"lora_path": str(invalid_files)
|
||||
})
|
||||
await client.post(
|
||||
"load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={"lora_name": "invalid-json", "lora_path": str(invalid_files)},
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("test_name,config_change,expected_error",
|
||||
BADREQUEST_CASES)
|
||||
async def test_dynamic_lora_badrequests(client: openai.AsyncOpenAI, tmp_path,
|
||||
zephyr_lora_files, test_name: str,
|
||||
config_change: dict,
|
||||
expected_error: str):
|
||||
@pytest.mark.parametrize("test_name,config_change,expected_error", BADREQUEST_CASES)
|
||||
async def test_dynamic_lora_badrequests(
|
||||
client: openai.AsyncOpenAI,
|
||||
tmp_path,
|
||||
zephyr_lora_files,
|
||||
test_name: str,
|
||||
config_change: dict,
|
||||
expected_error: str,
|
||||
):
|
||||
# Create test directory
|
||||
test_dir = tmp_path / test_name
|
||||
|
||||
@@ -191,29 +176,28 @@ async def test_dynamic_lora_badrequests(client: openai.AsyncOpenAI, tmp_path,
|
||||
|
||||
# Test loading the adapter
|
||||
with pytest.raises(openai.BadRequestError, match=expected_error):
|
||||
await client.post("load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={
|
||||
"lora_name": test_name,
|
||||
"lora_path": str(test_dir)
|
||||
})
|
||||
await client.post(
|
||||
"load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={"lora_name": test_name, "lora_path": str(test_dir)},
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_multiple_lora_adapters(client: openai.AsyncOpenAI, tmp_path,
|
||||
zephyr_lora_files):
|
||||
async def test_multiple_lora_adapters(
|
||||
client: openai.AsyncOpenAI, tmp_path, zephyr_lora_files
|
||||
):
|
||||
"""Validate that many loras can be dynamically registered and inferenced
|
||||
with concurrently"""
|
||||
|
||||
# This test file configures the server with --max-cpu-loras=2 and this test
|
||||
# will concurrently load 10 adapters, so it should flex the LRU cache
|
||||
async def load_and_run_adapter(adapter_name: str):
|
||||
await client.post("load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={
|
||||
"lora_name": adapter_name,
|
||||
"lora_path": str(zephyr_lora_files)
|
||||
})
|
||||
await client.post(
|
||||
"load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={"lora_name": adapter_name, "lora_path": str(zephyr_lora_files)},
|
||||
)
|
||||
for _ in range(3):
|
||||
await client.completions.create(
|
||||
model=adapter_name,
|
||||
@@ -223,8 +207,7 @@ async def test_multiple_lora_adapters(client: openai.AsyncOpenAI, tmp_path,
|
||||
|
||||
lora_tasks = []
|
||||
for i in range(10):
|
||||
lora_tasks.append(
|
||||
asyncio.create_task(load_and_run_adapter(f"adapter_{i}")))
|
||||
lora_tasks.append(asyncio.create_task(load_and_run_adapter(f"adapter_{i}")))
|
||||
|
||||
results, _ = await asyncio.wait(lora_tasks)
|
||||
|
||||
@@ -234,8 +217,8 @@ async def test_multiple_lora_adapters(client: openai.AsyncOpenAI, tmp_path,
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_loading_invalid_adapters_does_not_break_others(
|
||||
client: openai.AsyncOpenAI, tmp_path, zephyr_lora_files):
|
||||
|
||||
client: openai.AsyncOpenAI, tmp_path, zephyr_lora_files
|
||||
):
|
||||
invalid_files = tmp_path / "invalid_files"
|
||||
invalid_files.mkdir()
|
||||
(invalid_files / "adapter_config.json").write_text("this is not json")
|
||||
@@ -266,20 +249,18 @@ async def test_loading_invalid_adapters_does_not_break_others(
|
||||
# Run a bunch of bad adapter loads
|
||||
for _ in range(25):
|
||||
with suppress(openai.NotFoundError):
|
||||
await client.post("load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={
|
||||
"lora_name": "notfound",
|
||||
"lora_path": "/not/an/adapter"
|
||||
})
|
||||
await client.post(
|
||||
"load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={"lora_name": "notfound", "lora_path": "/not/an/adapter"},
|
||||
)
|
||||
for _ in range(25):
|
||||
with suppress(openai.BadRequestError):
|
||||
await client.post("load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={
|
||||
"lora_name": "invalid",
|
||||
"lora_path": str(invalid_files)
|
||||
})
|
||||
await client.post(
|
||||
"load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={"lora_name": "invalid", "lora_path": str(invalid_files)},
|
||||
)
|
||||
|
||||
# Ensure all the running requests with lora adapters succeeded
|
||||
stop_good_requests_event.set()
|
||||
@@ -288,12 +269,11 @@ async def test_loading_invalid_adapters_does_not_break_others(
|
||||
assert not isinstance(r, Exception), f"Got exception {r}"
|
||||
|
||||
# Ensure we can load another adapter and run it
|
||||
await client.post("load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={
|
||||
"lora_name": "valid",
|
||||
"lora_path": zephyr_lora_files
|
||||
})
|
||||
await client.post(
|
||||
"load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={"lora_name": "valid", "lora_path": zephyr_lora_files},
|
||||
)
|
||||
await client.completions.create(
|
||||
model="valid",
|
||||
prompt=["Hello there", "Foo bar bazz buzz"],
|
||||
@@ -310,12 +290,11 @@ async def test_beam_search_with_lora_adapters(
|
||||
"""Validate that async beam search can be used with lora."""
|
||||
|
||||
async def load_and_run_adapter(adapter_name: str):
|
||||
await client.post("load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={
|
||||
"lora_name": adapter_name,
|
||||
"lora_path": str(zephyr_lora_files)
|
||||
})
|
||||
await client.post(
|
||||
"load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={"lora_name": adapter_name, "lora_path": str(zephyr_lora_files)},
|
||||
)
|
||||
for _ in range(3):
|
||||
await client.completions.create(
|
||||
model=adapter_name,
|
||||
@@ -326,8 +305,7 @@ async def test_beam_search_with_lora_adapters(
|
||||
|
||||
lora_tasks = []
|
||||
for i in range(3):
|
||||
lora_tasks.append(
|
||||
asyncio.create_task(load_and_run_adapter(f"adapter_{i}")))
|
||||
lora_tasks.append(asyncio.create_task(load_and_run_adapter(f"adapter_{i}")))
|
||||
|
||||
results, _ = await asyncio.wait(lora_tasks)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user