[MISC] Consolidate cleanup() and refactor offline_inference_with_prefix.py (#9510)

This commit is contained in:
Cody Yu
2024-10-18 14:30:55 -07:00
committed by GitHub
parent 9bb10a7d27
commit d11bf435a0
20 changed files with 84 additions and 105 deletions

View File

@@ -6,11 +6,10 @@ from typing import List
import pytest
import vllm
from vllm.distributed import cleanup_dist_env_and_memory
from vllm.lora.request import LoRARequest
from vllm.utils import is_hip
from .conftest import cleanup
@dataclass
class ModelWithQuantization:
@@ -160,7 +159,7 @@ def test_quant_model_lora(tinyllama_lora_files, num_gpus_available, model,
print("removing lora")
del llm
cleanup()
cleanup_dist_env_and_memory()
@pytest.mark.parametrize("model", MODELS)
@@ -181,7 +180,7 @@ def test_quant_model_tp_equality(tinyllama_lora_files, num_gpus_available,
output_tp1 = do_sample(llm_tp1, tinyllama_lora_files, lora_id=1)
del llm_tp1
cleanup()
cleanup_dist_env_and_memory()
llm_tp2 = vllm.LLM(
model=model.model_path,
@@ -194,6 +193,6 @@ def test_quant_model_tp_equality(tinyllama_lora_files, num_gpus_available,
output_tp2 = do_sample(llm_tp2, tinyllama_lora_files, lora_id=1)
del llm_tp2
cleanup()
cleanup_dist_env_and_memory()
assert output_tp1 == output_tp2