[misc] improve memory profiling (#11809)
Signed-off-by: youkaichao <youkaichao@gmail.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
@@ -1742,10 +1742,10 @@ class MemorySnapshot:
|
||||
timestamp: float = 0.0
|
||||
|
||||
def measure(self):
|
||||
self.torch_peak_in_bytes = torch.cuda.memory_stats(
|
||||
)["allocated_bytes.all.peak"]
|
||||
self.torch_memory_in_bytes = torch.cuda.memory_stats(
|
||||
)["allocated_bytes.all.current"]
|
||||
self.torch_peak_in_bytes = torch.cuda.max_memory_reserved()
|
||||
# torch.cuda.memory_reserved() is how many bytes
|
||||
# PyTorch gets from cuda (by calling cudaMalloc, etc.)
|
||||
self.torch_memory_in_bytes = torch.cuda.memory_reserved()
|
||||
self.timestamp = time.time()
|
||||
|
||||
def __sub__(self, other: "MemorySnapshot") -> "MemorySnapshot":
|
||||
@@ -1822,10 +1822,10 @@ def memory_profiling(
|
||||
|
||||
The memory used for loading weights (a.) is directly given from the argument `weights_memory_in_bytes`.
|
||||
|
||||
The increase of ``torch.cuda.memory_stats()["allocated_bytes.all.peak"]` after profiling gives (b.).
|
||||
The increase of `torch.cuda.memory_stats()["allocated_bytes.all.peak"]` after profiling gives (b.).
|
||||
|
||||
(c.) is tricky. We measure the total memory used in this GPU (`torch.cuda.mem_get_info()[1] - torch.cuda.mem_get_info()[0]`),
|
||||
subtract the baseline memory, the memory used by the model weights, and diff of `torch.cuda.memory_stats()["allocated_bytes.all.current"]`.
|
||||
subtract the baseline memory, the memory used by the model weights, and diff of `torch.cuda.memory_reserved()`.
|
||||
""" # noqa
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user