🐛 fix torch memory profiling (#9516)

Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
2024-10-18 20:25:19 -05:00
parent 337ed76671
commit 380e18639f
3 changed files with 14 additions and 11 deletions
--- a/tests/quantization/test_bitsandbytes.py
+++ b/tests/quantization/test_bitsandbytes.py
@@ -107,8 +107,7 @@ def validate_generated_texts(hf_runner,
                     quantization='bitsandbytes',
                     load_format='bitsandbytes',
                     tensor_parallel_size=vllm_tp_size,
-                     enforce_eager=False,
-                     gpu_memory_utilization=0.8) as llm:
+                     enforce_eager=False) as llm:
        vllm_outputs = llm.generate_greedy(prompts, 8)
        vllm_logs = log_generated_texts(prompts, vllm_outputs, "VllmRunner")