[V0 Deprecation] Remove VLLM_USE_V1 from tests (#26341)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-10-07 23:42:31 +08:00
parent c0a7b89d8e
commit 1e4ecca1d0
51 changed files with 817 additions and 1275 deletions
--- a/tests/compile/piecewise/test_full_cudagraph.py
+++ b/tests/compile/piecewise/test_full_cudagraph.py
@@ -66,7 +66,6 @@ def llm_pair(request):
            pytest.skip("Only Blackwell GPUs support Cutlass MLA")

    env_vars = {
-        "VLLM_USE_V1": "1",
        # Force native sampler to avoid potential nondeterminism in FlashInfer
        # when per-request generators are not used in V1.
        "VLLM_USE_FLASHINFER_SAMPLER": "0",
@@ -161,7 +160,6 @@ def test_full_cudagraph_with_invalid_backend():
    with (
        temporary_environ(
            {
-                "VLLM_USE_V1": "1",
                "VLLM_ATTENTION_BACKEND": "FLEX_ATTENTION",
                # Flex_Attention is not supported with full cuda graph
            }