[Bugfix] Guard _may_reorder_batch for encoder-only models on CPU (#24319) (#24348)

Signed-off-by: Remy <eunhwan.shin@dtonic.io> Co-authored-by: Li, Jiang <jiang1.li@intel.com>
2025-09-10 15:24:42 +09:00
parent 91130ae376
commit feaf202e93
3 changed files with 26 additions and 7 deletions
--- a/vllm/config/init.py
+++ b/vllm/config/init.py
@@ -3665,7 +3665,8 @@ class VllmConfig:
            # logger should only print warning message for hybrid models. As we
            # can't know whether the model is hybrid or not now, so we don't log
            # warning message here and will log it later.
-            if not (current_platform.is_cuda() or current_platform.is_rocm()):
+            if not (current_platform.is_cuda() or current_platform.is_rocm()
+                    or current_platform.is_cpu()):
                # Hybrid KV cache manager is not supported on non-GPU platforms.
                self.scheduler_config.disable_hybrid_kv_cache_manager = True
            if self.kv_transfer_config is not None: