Signed-off-by: Remy <eunhwan.shin@dtonic.io> Co-authored-by: Li, Jiang <jiang1.li@intel.com>
This commit is contained in:
@@ -3665,7 +3665,8 @@ class VllmConfig:
|
||||
# logger should only print warning message for hybrid models. As we
|
||||
# can't know whether the model is hybrid or not now, so we don't log
|
||||
# warning message here and will log it later.
|
||||
if not (current_platform.is_cuda() or current_platform.is_rocm()):
|
||||
if not (current_platform.is_cuda() or current_platform.is_rocm()
|
||||
or current_platform.is_cpu()):
|
||||
# Hybrid KV cache manager is not supported on non-GPU platforms.
|
||||
self.scheduler_config.disable_hybrid_kv_cache_manager = True
|
||||
if self.kv_transfer_config is not None:
|
||||
|
||||
Reference in New Issue
Block a user