[Bugfix] Fix cuda event usage with CPU model runner (#23643)

Signed-off-by: jiang1.li <jiang1.li@intel.com>
2025-08-27 01:10:42 +08:00
parent 44ac25eae2
commit 9b0187003e
2 changed files with 26 additions and 4 deletions
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -321,7 +321,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
            (self.max_model_len, 1),
            dtype=torch.int64,
            device="cpu",
-            pin_memory=True)
+            pin_memory=self.pin_memory)

    def _make_buffer(self, *args, dtype: torch.dtype) -> CpuGpuBuffer:
        return CpuGpuBuffer(*args,