[MISC] Consolidate cleanup() and refactor offline_inference_with_prefix.py (#9510)
This commit is contained in:
@@ -6,11 +6,10 @@ from typing import List
|
||||
import pytest
|
||||
|
||||
import vllm
|
||||
from vllm.distributed import cleanup_dist_env_and_memory
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.utils import is_hip
|
||||
|
||||
from .conftest import cleanup
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelWithQuantization:
|
||||
@@ -160,7 +159,7 @@ def test_quant_model_lora(tinyllama_lora_files, num_gpus_available, model,
|
||||
print("removing lora")
|
||||
|
||||
del llm
|
||||
cleanup()
|
||||
cleanup_dist_env_and_memory()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", MODELS)
|
||||
@@ -181,7 +180,7 @@ def test_quant_model_tp_equality(tinyllama_lora_files, num_gpus_available,
|
||||
output_tp1 = do_sample(llm_tp1, tinyllama_lora_files, lora_id=1)
|
||||
|
||||
del llm_tp1
|
||||
cleanup()
|
||||
cleanup_dist_env_and_memory()
|
||||
|
||||
llm_tp2 = vllm.LLM(
|
||||
model=model.model_path,
|
||||
@@ -194,6 +193,6 @@ def test_quant_model_tp_equality(tinyllama_lora_files, num_gpus_available,
|
||||
output_tp2 = do_sample(llm_tp2, tinyllama_lora_files, lora_id=1)
|
||||
|
||||
del llm_tp2
|
||||
cleanup()
|
||||
cleanup_dist_env_and_memory()
|
||||
|
||||
assert output_tp1 == output_tp2
|
||||
|
||||
Reference in New Issue
Block a user