[CPU] V1 support for the CPU backend (#16441)

2025-06-04 09:43:01 +08:00
parent 52dceb172d
commit 4555143ea7
15 changed files with 465 additions and 40 deletions
--- a/vllm/compilation/wrapper.py
+++ b/vllm/compilation/wrapper.py
@@ -41,11 +41,16 @@ class TorchCompileWrapperWithCustomDispatcher:
            # compiling the forward method

            backend = vllm_config.compilation_config.init_backend(vllm_config)
+            options = None
+            if isinstance(backend, str) and backend == "inductor":
+                options = get_current_vllm_config(
+                ).compilation_config.inductor_compile_config

            compiled_callable = torch.compile(
                self.forward,
                fullgraph=envs.VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE,
-                backend=backend)
+                backend=backend,
+                options=options)

        self.compiled_callable = compiled_callable
        self.original_code_object = self.__class__.forward.__code__