[CI/Test] improve robustness of test (hf_runner) (#5347)

[CI/Test] improve robustness of test by replacing del with context manager (hf_runner) (#5347)
This commit is contained in:
youkaichao
2024-06-07 22:31:32 -07:00
committed by GitHub
parent c96fc06747
commit 9fb900f90c
14 changed files with 48 additions and 61 deletions

View File

@@ -116,16 +116,14 @@ def test_deserialized_encrypted_vllm_model_has_same_outputs(
def test_deserialized_hf_model_has_same_outputs(hf_runner, vllm_runner,
tmp_path):
hf_model = hf_runner(model_ref)
model_path = tmp_path / (model_ref + ".tensors")
max_tokens = 50
outputs = hf_model.generate_greedy(prompts, max_tokens=max_tokens)
with open_stream(model_path, "wb+") as stream:
serializer = TensorSerializer(stream)
serializer.write_module(hf_model.model)
del hf_model
gc.collect()
torch.cuda.empty_cache()
with hf_runner(model_ref) as hf_model:
model_path = tmp_path / (model_ref + ".tensors")
max_tokens = 50
outputs = hf_model.generate_greedy(prompts, max_tokens=max_tokens)
with open_stream(model_path, "wb+") as stream:
serializer = TensorSerializer(stream)
serializer.write_module(hf_model.model)
loaded_hf_model = vllm_runner(model_ref,
load_format="tensorizer",
model_loader_extra_config=TensorizerConfig(