[Hardware][NIXL] set default kv buffer type for different platform (#36438)
Signed-off-by: zhenwei-intel <zhenwei.liu@intel.com> Co-authored-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
@@ -24,9 +24,9 @@ class KVTransferConfig:
|
||||
engine_id: str | None = None
|
||||
"""The engine id for KV transfers."""
|
||||
|
||||
kv_buffer_device: str = "cuda"
|
||||
"""The device used by kv connector to buffer the KV cache. Choices are
|
||||
'cuda' and 'cpu'."""
|
||||
kv_buffer_device: str | None = None
|
||||
"""The device used by kv connector to buffer the KV cache. Choices are
|
||||
'cuda','cpu' and 'xpu'."""
|
||||
|
||||
kv_buffer_size: float = 1e9
|
||||
"""The buffer size for TorchDistributedConnector. Measured in number of
|
||||
@@ -100,6 +100,11 @@ class KVTransferConfig:
|
||||
f"is set, supported roles are {get_args(KVRole)}"
|
||||
)
|
||||
|
||||
if self.kv_buffer_device is None:
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
self.kv_buffer_device = current_platform.device_type
|
||||
|
||||
@property
|
||||
def is_kv_transfer_instance(self) -> bool:
|
||||
return self.kv_connector is not None and self.kv_role in get_args(KVRole)
|
||||
|
||||
Reference in New Issue
Block a user