[Hardware][Intel] Isolate CPUModelRunner and ModelRunner for better maintenance (#3824)

2024-04-12 02:56:49 +08:00
parent 08ccee1e83
commit 8afca50889
5 changed files with 443 additions and 61 deletions
--- a/vllm/executor/cpu_executor.py
+++ b/vllm/executor/cpu_executor.py
@@ -25,6 +25,7 @@ class CPUExecutor(ExecutorBase):
        assert lora_config is None, "cpu backend doesn't support LoRA"
        model_config = _verify_and_get_model_config(model_config)
        cache_config = _verify_and_get_cache_config(cache_config)
+        scheduler_config = _verify_and_get_scheduler_config(scheduler_config)

        self.model_config = model_config
        self.cache_config = cache_config
@@ -116,6 +117,15 @@ def _verify_and_get_model_config(config: ModelConfig) -> ModelConfig:
    return config


+def _verify_and_get_scheduler_config(
+        config: SchedulerConfig) -> SchedulerConfig:
+    if config.chunked_prefill_enabled:
+        logger.warning("Chunked prefill is not supported on CPU, disable it.")
+        config.chunked_prefill_enabled = False
+
+    return config
+
+
 def _verify_and_get_cache_config(config: CacheConfig) -> CacheConfig:
    _GB = 1 << 30
    if config.enable_prefix_caching: