🐛 fix torch memory profiling (#9516)
Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
This commit is contained in:
@@ -107,8 +107,7 @@ def validate_generated_texts(hf_runner,
|
||||
quantization='bitsandbytes',
|
||||
load_format='bitsandbytes',
|
||||
tensor_parallel_size=vllm_tp_size,
|
||||
enforce_eager=False,
|
||||
gpu_memory_utilization=0.8) as llm:
|
||||
enforce_eager=False) as llm:
|
||||
vllm_outputs = llm.generate_greedy(prompts, 8)
|
||||
vllm_logs = log_generated_texts(prompts, vllm_outputs, "VllmRunner")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user