[Model Runner V2] Do not error on attention backends (#32820)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2026-01-21 17:02:48 -08:00
parent 408195ec59
commit 5e00b561cd
1 changed files with 0 additions and 10 deletions
--- a/vllm/v1/worker/gpu/model_runner.py
+++ b/vllm/v1/worker/gpu/model_runner.py
@@ -247,16 +247,6 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
                self.block_tables,
            )

-        # TODO(woosuk): Support other backends.
-        supported_backends = ("FLASH_ATTN", "FLASHINFER", "FLASHINFER_MLA")
-        for backend in self.attn_backends.values():
-            backend_name = backend.get_name()
-            if backend_name not in supported_backends:
-                raise NotImplementedError(
-                    f"The {backend_name} attention backend is not supported yet. "
-                    f"Supported backends are: {supported_backends}."
-                )
-
        self.kv_caches: list[torch.Tensor] = []
        init_kv_cache(
            self.kv_caches,