[V0 deprecation] Remove VLLM_USE_V1 usage in most modules (#27955)

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-11-05 12:51:16 +08:00
parent 878fd5a16f
commit 428bc7bf1c
19 changed files with 107 additions and 238 deletions
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -220,14 +220,8 @@ async def build_async_engine_client_from_engine_args(
    # Create the EngineConfig (determines if we can use V1).
    vllm_config = engine_args.create_engine_config(usage_context=usage_context)

-    # V1 AsyncLLM.
-    assert envs.VLLM_USE_V1
-
    if disable_frontend_multiprocessing:
-        logger.warning(
-            "V1 is enabled, but got --disable-frontend-multiprocessing. "
-            "To disable frontend multiprocessing, set VLLM_USE_V1=0."
-        )
+        logger.warning("V1 is enabled, but got --disable-frontend-multiprocessing.")

    from vllm.v1.engine.async_llm import AsyncLLM