[Bugfix] Fix cuda event usage with CPU model runner (#23643)
Signed-off-by: jiang1.li <jiang1.li@intel.com>
This commit is contained in:
@@ -321,7 +321,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
||||
(self.max_model_len, 1),
|
||||
dtype=torch.int64,
|
||||
device="cpu",
|
||||
pin_memory=True)
|
||||
pin_memory=self.pin_memory)
|
||||
|
||||
def _make_buffer(self, *args, dtype: torch.dtype) -> CpuGpuBuffer:
|
||||
return CpuGpuBuffer(*args,
|
||||
|
||||
Reference in New Issue
Block a user