[CI/Test] improve robustness of test (hf_runner) (#5347)

[CI/Test] improve robustness of test by replacing del with context manager (hf_runner) (#5347)
This commit is contained in:
youkaichao
2024-06-07 22:31:32 -07:00
committed by GitHub
parent c96fc06747
commit 9fb900f90c
14 changed files with 48 additions and 61 deletions

View File

@@ -43,9 +43,8 @@ def test_chunked_prefill_recompute(
enable_chunked_prefill = True
max_num_batched_tokens = chunked_prefill_token_size
hf_model = hf_runner(model, dtype=dtype)
hf_outputs = hf_model.generate_greedy(example_prompts, max_tokens)
del hf_model
with hf_runner(model, dtype=dtype) as hf_model:
hf_outputs = hf_model.generate_greedy(example_prompts, max_tokens)
vllm_model = vllm_runner(
model,
@@ -82,9 +81,8 @@ def test_preemption(
) -> None:
"""By default, recompute preemption is enabled"""
hf_model = hf_runner(model, dtype=dtype)
hf_outputs = hf_model.generate_greedy(example_prompts, max_tokens)
del hf_model
with hf_runner(model, dtype=dtype) as hf_model:
hf_outputs = hf_model.generate_greedy(example_prompts, max_tokens)
vllm_model = vllm_runner(
model,
@@ -137,10 +135,9 @@ def test_swap(
) -> None:
"""Use beam search enables swapping."""
example_prompts = example_prompts[:1]
hf_model = hf_runner(model, dtype=dtype)
hf_outputs = hf_model.generate_beam_search(example_prompts, beam_width,
max_tokens)
del hf_model
with hf_runner(model, dtype=dtype) as hf_model:
hf_outputs = hf_model.generate_beam_search(example_prompts, beam_width,
max_tokens)
vllm_model = vllm_runner(
model,