[Model Runner V2] Optimize CUDA graph capture time (#29275)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
@@ -313,6 +313,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
||||
return 0
|
||||
|
||||
start_time = time.perf_counter()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
start_free_gpu_memory = torch.cuda.mem_get_info()[0]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user