[Hardware][Intel] Add CPU inference backend (#3634)

Co-authored-by: Kunshang Ji <kunshang.ji@intel.com> Co-authored-by: Yuan Zhou <yuan.zhou@intel.com>
2024-04-02 13:07:30 +08:00
parent eb69d68804
commit 0e3f06fe9c
24 changed files with 2747 additions and 5 deletions
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -178,6 +178,9 @@ class LLMEngine:
        if device_config.device_type == "neuron":
            from vllm.executor.neuron_executor import NeuronExecutor
            executor_class = NeuronExecutor
+        elif device_config.device_type == "cpu":
+            from vllm.executor.cpu_executor import CPUExecutor
+            executor_class = CPUExecutor
        elif parallel_config.worker_use_ray:
            initialize_ray_cluster(parallel_config)
            from vllm.executor.ray_gpu_executor import RayGPUExecutor