[Refactor] Relocate tests from tests/v1/entrypoints/ to tests/entrypoints/ (#37500)

Signed-off-by: sfeng33 <4florafeng@gmail.com>
2026-03-20 05:50:34 -04:00
parent 6050b93bed
commit b4c1aef21c
16 changed files with 171 additions and 235 deletions
--- a/.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh
+++ b/.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh
@@ -127,7 +127,7 @@ run_and_track_test() {

 # --- Actual Test Execution ---
 run_and_track_test 1 "test_struct_output_generate.py" \
-    "python3 -m pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py -k \"not test_structured_output_with_reasoning_matrices\""
+    "python3 -m pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_struct_output_generate.py -k \"not test_structured_output_with_reasoning_matrices\""
 run_and_track_test 2 "test_moe_pallas.py" \
    "python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_moe_pallas.py"
 run_and_track_test 3 "test_lora.py" \
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -483,19 +483,6 @@ steps:
    - pytest -v -s v1/e2e/spec_decode/test_spec_decode.py -k "tensor_parallelism"


- label: Entrypoints V1 # TBD
-  timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  optional: true
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/
-  - tests/v1
-  commands:
-  - pytest -v -s v1/entrypoints
-
-
 - label: V1 Sample + Logits # TBD
  timeout_in_minutes: 60
  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
@@ -1173,14 +1160,14 @@ steps:
  - vllm/v1/engine/
  - vllm/v1/worker/
  - tests/v1/distributed
-  - tests/v1/entrypoints/openai/test_multi_api_servers.py
+  - tests/entrypoints/openai/test_multi_api_servers.py
  - vllm/platforms/rocm.py
  commands:
  - export TORCH_NCCL_BLOCKING_WAIT=1
  - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py
  - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
  - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
-  - DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py
+  - DP_SIZE=2 pytest -v -s entrypoints/openai/test_multi_api_servers.py


 - label: Distributed Compile + RPC Tests (2 GPUs) # TBD
@@ -1766,19 +1753,6 @@ steps:
    - pytest -v -s v1/e2e/spec_decode/test_spec_decode.py -k "eagle_correctness_heavy"


- label: Entrypoints V1 # 25.7m
-  timeout_in_minutes: 45
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  optional: true
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/
-  - tests/v1
-  commands:
-  - pytest -v -s v1/entrypoints
-
-
 - label: V1 Spec Decode # TBD
  timeout_in_minutes: 40
  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
@@ -2391,14 +2365,14 @@ steps:
  - vllm/v1/engine/
  - vllm/v1/worker/
  - tests/v1/distributed
-  - tests/v1/entrypoints/openai/test_multi_api_servers.py
+  - tests/entrypoints/openai/test_multi_api_servers.py
  - vllm/platforms/rocm.py
  commands:
  - export TORCH_NCCL_BLOCKING_WAIT=1
  - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py
  - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
  - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
-  - DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py
+  - DP_SIZE=2 pytest -v -s entrypoints/openai/test_multi_api_servers.py


 - label: Distributed Compile + RPC Tests (2 GPUs) # 56.1m
--- a/.buildkite/test_areas/distributed.yaml
+++ b/.buildkite/test_areas/distributed.yaml
@@ -27,14 +27,14 @@ steps:
  - vllm/v1/engine/
  - vllm/v1/worker/
  - tests/v1/distributed
-  - tests/v1/entrypoints/openai/test_multi_api_servers.py
+  - tests/entrypoints/openai/test_multi_api_servers.py
  commands:
  # https://github.com/NVIDIA/nccl/issues/1838
  - export NCCL_CUMEM_HOST_ENABLE=0
  - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py
  - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
  - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
-  - DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py
+  - DP_SIZE=2 pytest -v -s entrypoints/openai/test_multi_api_servers.py

 - label: Distributed Compile + RPC Tests (2 GPUs)
  timeout_in_minutes: 20
--- a/.buildkite/test_areas/entrypoints.yaml
+++ b/.buildkite/test_areas/entrypoints.yaml
@@ -34,7 +34,7 @@ steps:
  - tests/entrypoints/test_chat_utils
  commands:
  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/  --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
+  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/  --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses --ignore=entrypoints/openai/test_multi_api_servers.py
  - pytest -v -s entrypoints/test_chat_utils.py
  mirror:
    amd:
@@ -75,19 +75,6 @@ steps:
  commands:
  - pytest -v -s entrypoints/openai/responses

- label: Entrypoints V1
-  timeout_in_minutes: 50
-  source_file_dependencies:
-    - vllm/
-    - tests/v1
-  commands:
-    - pytest -v -s v1/entrypoints
-  mirror:
-    amd:
-      device: mi325_1
-      depends_on:
-      - image-build-amd
-
 - label: OpenAI API Correctness
  timeout_in_minutes: 30
  source_file_dependencies:
--- a/.buildkite/test_areas/model_runner_v2.yaml
+++ b/.buildkite/test_areas/model_runner_v2.yaml
@@ -11,7 +11,7 @@ steps:
  - vllm/v1/attention/
  - tests/v1/engine/test_llm_engine.py
  - tests/v1/e2e/
-  - tests/v1/entrypoints/llm/test_struct_output_generate.py
+  - tests/entrypoints/llm/test_struct_output_generate.py
  commands:
  - set -x
  - export VLLM_USE_V2_MODEL_RUNNER=1
@@ -22,7 +22,7 @@ steps:
  - pytest -v -s v1/e2e/general/test_context_length.py
  - pytest -v -s v1/e2e/general/test_min_tokens.py
  # Temporary hack filter to exclude ngram spec decoding based tests.
-  - pytest -v -s v1/entrypoints/llm/test_struct_output_generate.py -k "xgrammar and not speculative_config6 and not speculative_config7 and not speculative_config8 and not speculative_config0"
+  - pytest -v -s entrypoints/llm/test_struct_output_generate.py -k "xgrammar and not speculative_config6 and not speculative_config7 and not speculative_config8 and not speculative_config0"

 - label: Model Runner V2 Examples
  timeout_in_minutes: 45
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -75,7 +75,7 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson
 /tests/multimodal @DarkLight1337 @ywang96 @NickLucche
 /tests/quantization @mgoin @robertgshaw2-redhat @yewentao256 @pavanimajety
 /tests/test_inputs.py @DarkLight1337 @ywang96
-/tests/v1/entrypoints/llm/test_struct_output_generate.py @mgoin @russellb @aarnphm
+/tests/entrypoints/llm/test_struct_output_generate.py @mgoin @russellb @aarnphm
 /tests/v1/structured_output @mgoin @russellb @aarnphm
 /tests/v1/core @WoosukKwon @robertgshaw2-redhat @njhill @ywang96 @alexm-redhat @heheda12345 @ApostaC @orozery
 /tests/weight_loading @mgoin @youkaichao @yewentao256
--- a/.github/mergify.yml
+++ b/.github/mergify.yml
@@ -260,7 +260,7 @@ pull_request_rules:
      - files=examples/offline_inference/structured_outputs.py
      - files=examples/online_serving/structured_outputs/structured_outputs.py
      - files~=^tests/v1/structured_output/
-      - files=tests/v1/entrypoints/llm/test_struct_output_generate.py
+      - files=tests/entrypoints/llm/test_struct_output_generate.py
      - files~=^vllm/v1/structured_output/
  actions:
    label:
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -6,9 +6,6 @@ from copy import deepcopy

 from tblib import pickling_support

-# Import fixture
-from tests.v1.entrypoints.conftest import sample_json_schema  # noqa
-
 # ruff: noqa

 # Install support for pickling exceptions so that we can nicely propagate
@@ -81,6 +78,55 @@ if TYPE_CHECKING:

 logger = init_logger(__name__)

+
+@pytest.fixture
+def sample_json_schema():
+    return {
+        "type": "object",
+        "properties": {
+            "name": {"type": "string"},
+            "age": {"type": "integer"},
+            "skills": {
+                "type": "array",
+                "items": {
+                    "type": "string",
+                },
+            },
+            "grade": {
+                "type": "string",
+                "pattern": "^[A-D]$",
+            },
+            "email": {
+                "type": "string",
+                "pattern": "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$",
+            },
+            "work_history": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "company": {"type": "string"},
+                        "duration": {
+                            "type": "number",
+                            "minimum": 0.0,
+                            "maximum": 100.0,
+                        },
+                        "position": {"type": "string"},
+                    },
+                    "required": ["company", "duration", "position"],
+                    "additionalProperties": False,
+                },
+                "minItems": 0,
+                "maxItems": 3,
+            },
+        },
+        "required": ["name", "age", "skills", "grade", "email", "work_history"],
+        "additionalProperties": False,
+        "minProperties": 1,
+        "maxProperties": 10,
+    }
+
+
 _TEST_DIR = os.path.dirname(__file__)
 _TEST_PROMPTS = [os.path.join(_TEST_DIR, "prompts", "example.txt")]
 _LONG_PROMPTS = [os.path.join(_TEST_DIR, "prompts", "summary.txt")]
--- a/tests/v1/entrypoints/llm/test_struct_output_generate.py
+++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py
@@ -24,6 +24,108 @@ from vllm.sampling_params import (
    StructuredOutputsParams,
 )

+SAMPLE_REGEX = (
+    r"((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.){3}"
+    r"(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)"
+)
+
+# Note: Ensure this only uses attributes compatible with xgrammar
+SAMPLE_JSON_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "name": {"type": "string"},
+        "age": {"type": "integer"},
+        "skills": {
+            "type": "array",
+            "items": {
+                "type": "string",
+            },
+        },
+        "grade": {
+            "type": "string",
+            "pattern": "^[A-D]$",  # Regex pattern
+        },
+        "email": {
+            "type": "string",
+            "pattern": "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$",
+        },
+        "work_history": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "company": {"type": "string"},
+                    "duration": {
+                        "type": "number",
+                        "minimum": 0.0,
+                        "maximum": 100.0,  # Numeric range
+                    },
+                    "position": {"type": "string"},
+                },
+                "required": ["company", "duration", "position"],
+                "additionalProperties": False,
+            },
+            "minItems": 0,
+            "maxItems": 3,
+        },
+    },
+    "required": ["name", "age", "skills", "grade", "email", "work_history"],
+    "additionalProperties": False,
+    "minProperties": 1,
+    "maxProperties": 10,
+}
+
+# A schema unsupported by xgrammar
+UNSUPPORTED_JSON_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "score": {
+            "type": "integer",
+            "multipleOf": 5,  # Numeric multiple
+        },
+        "tags": {
+            "type": "array",
+            "items": {"type": "string", "minLength": 10, "maxLength": 20},
+        },
+    },
+    "required": ["score", "tags"],
+    "additionalProperties": False,
+    "patternProperties": {
+        "^score$": {"type": "integer"},
+    },
+}
+
+SAMPLE_STRUCTURED_OUTPUTS_CHOICES = [
+    "Python",
+    "Java",
+    "JavaScript",
+    "C++",
+    "C#",
+    "PHP",
+    "TypeScript",
+    "Ruby",
+    "Swift",
+    "Kotlin",
+]
+
+SAMPLE_SQL_EBNF = """
+root ::= select_statement
+select_statement ::= "SELECT" column "from" table "where" condition
+column ::= "col_1" | "col_2"
+table ::= "table_1" | "table_2"
+condition ::= column "=" number
+number ::= "1" | "2"
+"""
+
+SAMPLE_SQL_LARK = """
+start: select_statement
+select_statement: "SELECT" column "from" table "where" condition
+column: "col_1" | "col_2"
+table: "table_1" | "table_2"
+condition: column "=" number
+number: "1" | "2"
+"""
+
 NGRAM_SPEC_CONFIG = {
    "model": "[ngram]",
    "num_speculative_tokens": 5,
@@ -110,17 +212,17 @@ class CarDescription(BaseModel):
    PARAMS_MODELS_BACKENDS_TOKENIZER_MODE,
 )
 def test_structured_output(
-    sample_json_schema: dict[str, Any],
-    unsupported_json_schema: dict[str, Any],
-    sample_sql_ebnf: str,
-    sample_sql_lark: str,
-    sample_regex: str,
-    sample_structured_outputs_choices: str,
    backend: str,
    tokenizer_mode: str,
    model_name: str,
    speculative_config: dict[str, Any],
 ):
+    sample_json_schema = SAMPLE_JSON_SCHEMA
+    unsupported_json_schema = UNSUPPORTED_JSON_SCHEMA
+    sample_sql_ebnf = SAMPLE_SQL_EBNF
+    sample_sql_lark = SAMPLE_SQL_LARK
+    sample_regex = SAMPLE_REGEX
+    sample_structured_outputs_choices = SAMPLE_STRUCTURED_OUTPUTS_CHOICES
    if current_platform.is_tpu() and speculative_config:
        pytest.skip("TPU does not support speculative decoding")

@@ -702,10 +804,10 @@ def test_structured_output_with_reasoning_matrices(

@pytest.mark.parametrize("model_name, tokenizer_mode", PARAMS_MODELS_TOKENIZER_MODE)
 def test_structured_output_auto_mode(
-    unsupported_json_schema: dict[str, Any],
    model_name: str,
    tokenizer_mode: str,
 ):
+    unsupported_json_schema = UNSUPPORTED_JSON_SCHEMA
    llm = LLM(
        model=model_name,
        max_model_len=1024,
@@ -808,9 +910,9 @@ def test_guidance_no_additional_properties():

@pytest.mark.parametrize("backend", ["guidance", "xgrammar", "outlines"])
 def test_structured_output_batched_with_non_structured_outputs_requests(
-    sample_json_schema: dict[str, Any],
    backend: str,
 ):
+    sample_json_schema = SAMPLE_JSON_SCHEMA
    # Don't use eager execution on TPUs because we want to test for no
    # recompilation at runtime
    enforce_eager = bool(not current_platform.is_tpu())
--- a/tests/entrypoints/openai/chat_completion/test_chat_completion.py
+++ b/tests/entrypoints/openai/chat_completion/test_chat_completion.py
--- a/tests/entrypoints/openai/chat_completion/test_completion_with_image_embeds.py
+++ b/tests/entrypoints/openai/chat_completion/test_completion_with_image_embeds.py
--- a/tests/entrypoints/openai/completion/test_completion.py
+++ b/tests/entrypoints/openai/completion/test_completion.py
--- a/tests/v1/entrypoints/openai/test_multi_api_servers.py
+++ b/tests/v1/entrypoints/openai/test_multi_api_servers.py
--- a/tests/v1/entrypoints/init.py
+++ b/tests/v1/entrypoints/init.py
--- a/tests/v1/entrypoints/conftest.py
+++ b/tests/v1/entrypoints/conftest.py
@@ -1,173 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import pytest
-
-
-@pytest.fixture
-def sample_prompts():
-    return [
-        "Hello, my name is",
-        "The president of the United States is",
-        "The capital of France is",
-        "The future of AI is",
-    ]
-
-
-@pytest.fixture
-def sample_token_ids():
-    return [
-        [0],
-        [0, 1],
-        [0, 2, 1],
-        [0, 3, 1, 2],
-    ]
-
-
-@pytest.fixture
-def sample_regex():
-    return (
-        r"((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.){3}"
-        r"(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)"
-    )
-
-
-# Note: Ensure this only uses attributes compatible with xgrammar
-@pytest.fixture
-def sample_json_schema():
-    return {
-        "type": "object",
-        "properties": {
-            "name": {"type": "string"},
-            "age": {"type": "integer"},
-            "skills": {
-                "type": "array",
-                "items": {
-                    "type": "string",
-                },
-            },
-            "grade": {
-                "type": "string",
-                "pattern": "^[A-D]$",  # Regex pattern
-            },
-            "email": {
-                "type": "string",
-                "pattern": "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$",
-            },
-            "work_history": {
-                "type": "array",
-                "items": {
-                    "type": "object",
-                    "properties": {
-                        "company": {"type": "string"},
-                        "duration": {
-                            "type": "number",
-                            "minimum": 0.0,
-                            "maximum": 100.0,  # Numeric range
-                        },
-                        "position": {"type": "string"},
-                    },
-                    "required": ["company", "duration", "position"],
-                    "additionalProperties": False,
-                },
-                "minItems": 0,
-                "maxItems": 3,
-            },
-        },
-        "required": ["name", "age", "skills", "grade", "email", "work_history"],
-        "additionalProperties": False,
-        "minProperties": 1,
-        "maxProperties": 10,
-    }
-
-
-# A schema unsupported by xgrammar
-@pytest.fixture
-def unsupported_json_schema():
-    return {
-        "type": "object",
-        "properties": {
-            "score": {
-                "type": "integer",
-                "multipleOf": 5,  # Numeric multiple
-            },
-            "tags": {
-                "type": "array",
-                "items": {"type": "string", "minLength": 10, "maxLength": 20},
-            },
-        },
-        "required": ["score", "tags"],
-        "additionalProperties": False,
-        "patternProperties": {
-            "^score$": {"type": "integer"},
-        },
-    }
-
-
-@pytest.fixture
-def sample_definition_json_schema():
-    return {
-        "$defs": {
-            "Step": {
-                "properties": {
-                    "explanation": {"title": "Explanation", "type": "string"},
-                    "output": {"title": "Output", "type": "string"},
-                },
-                "required": ["explanation", "output"],
-                "title": "Step",
-                "type": "object",
-            }
-        },
-        "properties": {
-            "steps": {
-                "items": {"$ref": "#/$defs/Step"},
-                "title": "Steps",
-                "type": "array",
-            },
-            "final_answer": {"title": "Final Answer", "type": "string"},
-        },
-        "required": ["steps", "final_answer"],
-        "title": "MathReasoning",
-        "type": "object",
-        "additionalProperties": False,
-    }
-
-
-@pytest.fixture
-def sample_structured_outputs_choices():
-    return [
-        "Python",
-        "Java",
-        "JavaScript",
-        "C++",
-        "C#",
-        "PHP",
-        "TypeScript",
-        "Ruby",
-        "Swift",
-        "Kotlin",
-    ]
-
-
-@pytest.fixture
-def sample_sql_ebnf():
-    return """
-root ::= select_statement
-select_statement ::= "SELECT" column "from" table "where" condition
-column ::= "col_1" | "col_2"
-table ::= "table_1" | "table_2"
-condition ::= column "=" number
-number ::= "1" | "2"
-"""
-
-
-@pytest.fixture
-def sample_sql_lark():
-    return """
-start: select_statement
-select_statement: "SELECT" column "from" table "where" condition
-column: "col_1" | "col_2"
-table: "table_1" | "table_2"
-condition: column "=" number
-number: "1" | "2"
-"""
--- a/tests/v1/entrypoints/llm/init.py
+++ b/tests/v1/entrypoints/llm/init.py