🐛 fix torch memory profiling (#9516)

Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
This commit is contained in:
Joe Runde
2024-10-18 20:25:19 -05:00
committed by GitHub
parent 337ed76671
commit 380e18639f
3 changed files with 14 additions and 11 deletions

View File

@@ -107,8 +107,7 @@ def validate_generated_texts(hf_runner,
quantization='bitsandbytes',
load_format='bitsandbytes',
tensor_parallel_size=vllm_tp_size,
enforce_eager=False,
gpu_memory_utilization=0.8) as llm:
enforce_eager=False) as llm:
vllm_outputs = llm.generate_greedy(prompts, 8)
vllm_logs = log_generated_texts(prompts, vllm_outputs, "VllmRunner")