Convert formatting to use ruff instead of yapf + isort (#26247)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -3,11 +3,13 @@
|
||||
"""
|
||||
Script to test add_lora, remove_lora, pin_lora, list_loras functions.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
|
||||
from vllm.entrypoints.openai.api_server import (
|
||||
build_async_engine_client_from_engine_args)
|
||||
build_async_engine_client_from_engine_args,
|
||||
)
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.v1.engine.llm_engine import LLMEngine
|
||||
|
||||
@@ -17,23 +19,24 @@ LORA_RANK = 8
|
||||
|
||||
|
||||
def make_lora_request(lora_id: int):
|
||||
return LoRARequest(lora_name=f"{lora_id}",
|
||||
lora_int_id=lora_id,
|
||||
lora_path=LORA_MODULE_PATH)
|
||||
return LoRARequest(
|
||||
lora_name=f"{lora_id}", lora_int_id=lora_id, lora_path=LORA_MODULE_PATH
|
||||
)
|
||||
|
||||
|
||||
def test_lora_functions_sync():
|
||||
|
||||
max_loras = 4
|
||||
# Create engine in eager-mode. Due to high max_loras, the CI can
|
||||
# OOM during cuda-graph capture.
|
||||
engine_args = EngineArgs(model=MODEL_PATH,
|
||||
enable_lora=True,
|
||||
max_loras=max_loras,
|
||||
max_lora_rank=LORA_RANK,
|
||||
max_model_len=128,
|
||||
gpu_memory_utilization=0.8,
|
||||
enforce_eager=True)
|
||||
engine_args = EngineArgs(
|
||||
model=MODEL_PATH,
|
||||
enable_lora=True,
|
||||
max_loras=max_loras,
|
||||
max_lora_rank=LORA_RANK,
|
||||
max_model_len=128,
|
||||
gpu_memory_utilization=0.8,
|
||||
enforce_eager=True,
|
||||
)
|
||||
|
||||
llm = LLMEngine.from_engine_args(engine_args)
|
||||
|
||||
@@ -70,15 +73,16 @@ def test_lora_functions_sync():
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_lora_functions_async():
|
||||
|
||||
max_loras = 4
|
||||
engine_args = AsyncEngineArgs(model=MODEL_PATH,
|
||||
enable_lora=True,
|
||||
max_loras=max_loras,
|
||||
max_lora_rank=LORA_RANK,
|
||||
max_model_len=128,
|
||||
gpu_memory_utilization=0.8,
|
||||
enforce_eager=True)
|
||||
engine_args = AsyncEngineArgs(
|
||||
model=MODEL_PATH,
|
||||
enable_lora=True,
|
||||
max_loras=max_loras,
|
||||
max_lora_rank=LORA_RANK,
|
||||
max_model_len=128,
|
||||
gpu_memory_utilization=0.8,
|
||||
enforce_eager=True,
|
||||
)
|
||||
|
||||
async def run_check(fn, args, expected: list):
|
||||
await fn(args)
|
||||
|
||||
Reference in New Issue
Block a user