[Bugfix][Core] Use torch.cuda.memory_stats() to profile peak memory usage (#9352)

Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
This commit is contained in:
Joe Runde
2024-10-17 21:47:27 -05:00
committed by GitHub
parent 48138a8415
commit de4008e2ab
4 changed files with 122 additions and 17 deletions

View File

@@ -44,7 +44,7 @@ def test_offline_mode(llm: LLM, monkeypatch):
LLM(model=MODEL_NAME,
max_num_batched_tokens=4096,
tensor_parallel_size=1,
gpu_memory_utilization=0.10,
gpu_memory_utilization=0.20,
enforce_eager=True)
finally:
# Reset the environment after the test