[V0 deprecation] Remove _VLLM_V1 suffixes from attention backend names (#25489)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com> Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com>
2025-09-25 13:37:50 -04:00
parent 71b25b0d48
commit 3468f17ebe
42 changed files with 131 additions and 174 deletions
--- a/tests/entrypoints/openai/test_serving_chat.py
+++ b/tests/entrypoints/openai/test_serving_chat.py
@@ -68,7 +68,7 @@ def default_server_args(with_tool_parser: bool):
 def gptoss_server(monkeypatch_module: pytest.MonkeyPatch,
                  default_server_args: list[str]):
    with monkeypatch_module.context() as m:
-        m.setenv("VLLM_ATTENTION_BACKEND", "TRITON_ATTN_VLLM_V1")
+        m.setenv("VLLM_ATTENTION_BACKEND", "TRITON_ATTN")
        with RemoteOpenAIServer(GPT_OSS_MODEL_NAME,
                                default_server_args) as remote_server:
            yield remote_server