[MISC] Consolidate cleanup() and refactor offline_inference_with_prefix.py (#9510)

This commit is contained in:
Cody Yu
2024-10-18 14:30:55 -07:00
committed by GitHub
parent 9bb10a7d27
commit d11bf435a0
20 changed files with 84 additions and 105 deletions

View File

@@ -1,6 +1,7 @@
import sys
from vllm import LLM, SamplingParams
from vllm.distributed import cleanup_dist_env_and_memory
def test_lazy_outlines(sample_regex):
@@ -14,6 +15,7 @@ def test_lazy_outlines(sample_regex):
]
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
# Create an LLM without guided decoding as a baseline.
llm = LLM(model="facebook/opt-125m",
enforce_eager=True,
gpu_memory_utilization=0.3)
@@ -26,8 +28,11 @@ def test_lazy_outlines(sample_regex):
# make sure outlines is not imported
assert 'outlines' not in sys.modules
# The second LLM needs to request a higher gpu_memory_utilization because
# the first LLM has already allocated a full 30% of the gpu memory.
# Destroy the LLM object and free up the GPU memory.
del llm
cleanup_dist_env_and_memory()
# Create an LLM with guided decoding enabled.
llm = LLM(model="facebook/opt-125m",
enforce_eager=True,
guided_decoding_backend="lm-format-enforcer",