diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index dc70ce5c7..d9411387e 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -330,7 +330,7 @@ class Worker(WorkerBase): free_gpu_memory = profile_result.after_profile.free_memory # NOTE(woosuk): Here we assume that the other processes using the same # GPU did not change their memory usage during the profiling. - assert self.init_snapshot.free_memory > free_gpu_memory, ( + assert self.init_snapshot.free_memory >= free_gpu_memory, ( "Error in memory profiling. " f"Initial free memory {format_gib(self.init_snapshot.free_memory)} GiB, " f"current free memory {format_gib(free_gpu_memory)} GiB. "