[Refactor] Relocate tests from tests/v1/entrypoints/ to tests/entrypoints/ (#37500)

Signed-off-by: sfeng33 <4florafeng@gmail.com>
This commit is contained in:
Flora Feng
2026-03-20 05:50:34 -04:00
committed by GitHub
parent 6050b93bed
commit b4c1aef21c
16 changed files with 171 additions and 235 deletions

View File

@@ -127,7 +127,7 @@ run_and_track_test() {
# --- Actual Test Execution ---
run_and_track_test 1 "test_struct_output_generate.py" \
"python3 -m pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py -k \"not test_structured_output_with_reasoning_matrices\""
"python3 -m pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_struct_output_generate.py -k \"not test_structured_output_with_reasoning_matrices\""
run_and_track_test 2 "test_moe_pallas.py" \
"python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_moe_pallas.py"
run_and_track_test 3 "test_lora.py" \

View File

@@ -483,19 +483,6 @@ steps:
- pytest -v -s v1/e2e/spec_decode/test_spec_decode.py -k "tensor_parallelism"
- label: Entrypoints V1 # TBD
timeout_in_minutes: 180
mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
agent_pool: mi250_1
optional: true
working_dir: "/vllm-workspace/tests"
source_file_dependencies:
- vllm/
- tests/v1
commands:
- pytest -v -s v1/entrypoints
- label: V1 Sample + Logits # TBD
timeout_in_minutes: 60
mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
@@ -1173,14 +1160,14 @@ steps:
- vllm/v1/engine/
- vllm/v1/worker/
- tests/v1/distributed
- tests/v1/entrypoints/openai/test_multi_api_servers.py
- tests/entrypoints/openai/test_multi_api_servers.py
- vllm/platforms/rocm.py
commands:
- export TORCH_NCCL_BLOCKING_WAIT=1
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
- DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py
- DP_SIZE=2 pytest -v -s entrypoints/openai/test_multi_api_servers.py
- label: Distributed Compile + RPC Tests (2 GPUs) # TBD
@@ -1766,19 +1753,6 @@ steps:
- pytest -v -s v1/e2e/spec_decode/test_spec_decode.py -k "eagle_correctness_heavy"
- label: Entrypoints V1 # 25.7m
timeout_in_minutes: 45
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
agent_pool: mi325_1
optional: true
working_dir: "/vllm-workspace/tests"
source_file_dependencies:
- vllm/
- tests/v1
commands:
- pytest -v -s v1/entrypoints
- label: V1 Spec Decode # TBD
timeout_in_minutes: 40
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
@@ -2391,14 +2365,14 @@ steps:
- vllm/v1/engine/
- vllm/v1/worker/
- tests/v1/distributed
- tests/v1/entrypoints/openai/test_multi_api_servers.py
- tests/entrypoints/openai/test_multi_api_servers.py
- vllm/platforms/rocm.py
commands:
- export TORCH_NCCL_BLOCKING_WAIT=1
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
- DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py
- DP_SIZE=2 pytest -v -s entrypoints/openai/test_multi_api_servers.py
- label: Distributed Compile + RPC Tests (2 GPUs) # 56.1m

View File

@@ -27,14 +27,14 @@ steps:
- vllm/v1/engine/
- vllm/v1/worker/
- tests/v1/distributed
- tests/v1/entrypoints/openai/test_multi_api_servers.py
- tests/entrypoints/openai/test_multi_api_servers.py
commands:
# https://github.com/NVIDIA/nccl/issues/1838
- export NCCL_CUMEM_HOST_ENABLE=0
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
- DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py
- DP_SIZE=2 pytest -v -s entrypoints/openai/test_multi_api_servers.py
- label: Distributed Compile + RPC Tests (2 GPUs)
timeout_in_minutes: 20

View File

@@ -34,7 +34,7 @@ steps:
- tests/entrypoints/test_chat_utils
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses --ignore=entrypoints/openai/test_multi_api_servers.py
- pytest -v -s entrypoints/test_chat_utils.py
mirror:
amd:
@@ -75,19 +75,6 @@ steps:
commands:
- pytest -v -s entrypoints/openai/responses
- label: Entrypoints V1
timeout_in_minutes: 50
source_file_dependencies:
- vllm/
- tests/v1
commands:
- pytest -v -s v1/entrypoints
mirror:
amd:
device: mi325_1
depends_on:
- image-build-amd
- label: OpenAI API Correctness
timeout_in_minutes: 30
source_file_dependencies:

View File

@@ -11,7 +11,7 @@ steps:
- vllm/v1/attention/
- tests/v1/engine/test_llm_engine.py
- tests/v1/e2e/
- tests/v1/entrypoints/llm/test_struct_output_generate.py
- tests/entrypoints/llm/test_struct_output_generate.py
commands:
- set -x
- export VLLM_USE_V2_MODEL_RUNNER=1
@@ -22,7 +22,7 @@ steps:
- pytest -v -s v1/e2e/general/test_context_length.py
- pytest -v -s v1/e2e/general/test_min_tokens.py
# Temporary hack filter to exclude ngram spec decoding based tests.
- pytest -v -s v1/entrypoints/llm/test_struct_output_generate.py -k "xgrammar and not speculative_config6 and not speculative_config7 and not speculative_config8 and not speculative_config0"
- pytest -v -s entrypoints/llm/test_struct_output_generate.py -k "xgrammar and not speculative_config6 and not speculative_config7 and not speculative_config8 and not speculative_config0"
- label: Model Runner V2 Examples
timeout_in_minutes: 45

2
.github/CODEOWNERS vendored
View File

@@ -75,7 +75,7 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson
/tests/multimodal @DarkLight1337 @ywang96 @NickLucche
/tests/quantization @mgoin @robertgshaw2-redhat @yewentao256 @pavanimajety
/tests/test_inputs.py @DarkLight1337 @ywang96
/tests/v1/entrypoints/llm/test_struct_output_generate.py @mgoin @russellb @aarnphm
/tests/entrypoints/llm/test_struct_output_generate.py @mgoin @russellb @aarnphm
/tests/v1/structured_output @mgoin @russellb @aarnphm
/tests/v1/core @WoosukKwon @robertgshaw2-redhat @njhill @ywang96 @alexm-redhat @heheda12345 @ApostaC @orozery
/tests/weight_loading @mgoin @youkaichao @yewentao256

2
.github/mergify.yml vendored
View File

@@ -260,7 +260,7 @@ pull_request_rules:
- files=examples/offline_inference/structured_outputs.py
- files=examples/online_serving/structured_outputs/structured_outputs.py
- files~=^tests/v1/structured_output/
- files=tests/v1/entrypoints/llm/test_struct_output_generate.py
- files=tests/entrypoints/llm/test_struct_output_generate.py
- files~=^vllm/v1/structured_output/
actions:
label:

View File

@@ -6,9 +6,6 @@ from copy import deepcopy
from tblib import pickling_support
# Import fixture
from tests.v1.entrypoints.conftest import sample_json_schema # noqa
# ruff: noqa
# Install support for pickling exceptions so that we can nicely propagate
@@ -81,6 +78,55 @@ if TYPE_CHECKING:
logger = init_logger(__name__)
@pytest.fixture
def sample_json_schema():
return {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
"skills": {
"type": "array",
"items": {
"type": "string",
},
},
"grade": {
"type": "string",
"pattern": "^[A-D]$",
},
"email": {
"type": "string",
"pattern": "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$",
},
"work_history": {
"type": "array",
"items": {
"type": "object",
"properties": {
"company": {"type": "string"},
"duration": {
"type": "number",
"minimum": 0.0,
"maximum": 100.0,
},
"position": {"type": "string"},
},
"required": ["company", "duration", "position"],
"additionalProperties": False,
},
"minItems": 0,
"maxItems": 3,
},
},
"required": ["name", "age", "skills", "grade", "email", "work_history"],
"additionalProperties": False,
"minProperties": 1,
"maxProperties": 10,
}
_TEST_DIR = os.path.dirname(__file__)
_TEST_PROMPTS = [os.path.join(_TEST_DIR, "prompts", "example.txt")]
_LONG_PROMPTS = [os.path.join(_TEST_DIR, "prompts", "summary.txt")]

View File

@@ -24,6 +24,108 @@ from vllm.sampling_params import (
StructuredOutputsParams,
)
SAMPLE_REGEX = (
r"((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.){3}"
r"(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)"
)
# Note: Ensure this only uses attributes compatible with xgrammar
SAMPLE_JSON_SCHEMA = {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
"skills": {
"type": "array",
"items": {
"type": "string",
},
},
"grade": {
"type": "string",
"pattern": "^[A-D]$", # Regex pattern
},
"email": {
"type": "string",
"pattern": "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$",
},
"work_history": {
"type": "array",
"items": {
"type": "object",
"properties": {
"company": {"type": "string"},
"duration": {
"type": "number",
"minimum": 0.0,
"maximum": 100.0, # Numeric range
},
"position": {"type": "string"},
},
"required": ["company", "duration", "position"],
"additionalProperties": False,
},
"minItems": 0,
"maxItems": 3,
},
},
"required": ["name", "age", "skills", "grade", "email", "work_history"],
"additionalProperties": False,
"minProperties": 1,
"maxProperties": 10,
}
# A schema unsupported by xgrammar
UNSUPPORTED_JSON_SCHEMA = {
"type": "object",
"properties": {
"score": {
"type": "integer",
"multipleOf": 5, # Numeric multiple
},
"tags": {
"type": "array",
"items": {"type": "string", "minLength": 10, "maxLength": 20},
},
},
"required": ["score", "tags"],
"additionalProperties": False,
"patternProperties": {
"^score$": {"type": "integer"},
},
}
SAMPLE_STRUCTURED_OUTPUTS_CHOICES = [
"Python",
"Java",
"JavaScript",
"C++",
"C#",
"PHP",
"TypeScript",
"Ruby",
"Swift",
"Kotlin",
]
SAMPLE_SQL_EBNF = """
root ::= select_statement
select_statement ::= "SELECT" column "from" table "where" condition
column ::= "col_1" | "col_2"
table ::= "table_1" | "table_2"
condition ::= column "=" number
number ::= "1" | "2"
"""
SAMPLE_SQL_LARK = """
start: select_statement
select_statement: "SELECT" column "from" table "where" condition
column: "col_1" | "col_2"
table: "table_1" | "table_2"
condition: column "=" number
number: "1" | "2"
"""
NGRAM_SPEC_CONFIG = {
"model": "[ngram]",
"num_speculative_tokens": 5,
@@ -110,17 +212,17 @@ class CarDescription(BaseModel):
PARAMS_MODELS_BACKENDS_TOKENIZER_MODE,
)
def test_structured_output(
sample_json_schema: dict[str, Any],
unsupported_json_schema: dict[str, Any],
sample_sql_ebnf: str,
sample_sql_lark: str,
sample_regex: str,
sample_structured_outputs_choices: str,
backend: str,
tokenizer_mode: str,
model_name: str,
speculative_config: dict[str, Any],
):
sample_json_schema = SAMPLE_JSON_SCHEMA
unsupported_json_schema = UNSUPPORTED_JSON_SCHEMA
sample_sql_ebnf = SAMPLE_SQL_EBNF
sample_sql_lark = SAMPLE_SQL_LARK
sample_regex = SAMPLE_REGEX
sample_structured_outputs_choices = SAMPLE_STRUCTURED_OUTPUTS_CHOICES
if current_platform.is_tpu() and speculative_config:
pytest.skip("TPU does not support speculative decoding")
@@ -702,10 +804,10 @@ def test_structured_output_with_reasoning_matrices(
@pytest.mark.parametrize("model_name, tokenizer_mode", PARAMS_MODELS_TOKENIZER_MODE)
def test_structured_output_auto_mode(
unsupported_json_schema: dict[str, Any],
model_name: str,
tokenizer_mode: str,
):
unsupported_json_schema = UNSUPPORTED_JSON_SCHEMA
llm = LLM(
model=model_name,
max_model_len=1024,
@@ -808,9 +910,9 @@ def test_guidance_no_additional_properties():
@pytest.mark.parametrize("backend", ["guidance", "xgrammar", "outlines"])
def test_structured_output_batched_with_non_structured_outputs_requests(
sample_json_schema: dict[str, Any],
backend: str,
):
sample_json_schema = SAMPLE_JSON_SCHEMA
# Don't use eager execution on TPUs because we want to test for no
# recompilation at runtime
enforce_eager = bool(not current_platform.is_tpu())

View File

@@ -1,173 +0,0 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
@pytest.fixture
def sample_prompts():
return [
"Hello, my name is",
"The president of the United States is",
"The capital of France is",
"The future of AI is",
]
@pytest.fixture
def sample_token_ids():
return [
[0],
[0, 1],
[0, 2, 1],
[0, 3, 1, 2],
]
@pytest.fixture
def sample_regex():
return (
r"((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.){3}"
r"(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)"
)
# Note: Ensure this only uses attributes compatible with xgrammar
@pytest.fixture
def sample_json_schema():
return {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
"skills": {
"type": "array",
"items": {
"type": "string",
},
},
"grade": {
"type": "string",
"pattern": "^[A-D]$", # Regex pattern
},
"email": {
"type": "string",
"pattern": "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$",
},
"work_history": {
"type": "array",
"items": {
"type": "object",
"properties": {
"company": {"type": "string"},
"duration": {
"type": "number",
"minimum": 0.0,
"maximum": 100.0, # Numeric range
},
"position": {"type": "string"},
},
"required": ["company", "duration", "position"],
"additionalProperties": False,
},
"minItems": 0,
"maxItems": 3,
},
},
"required": ["name", "age", "skills", "grade", "email", "work_history"],
"additionalProperties": False,
"minProperties": 1,
"maxProperties": 10,
}
# A schema unsupported by xgrammar
@pytest.fixture
def unsupported_json_schema():
return {
"type": "object",
"properties": {
"score": {
"type": "integer",
"multipleOf": 5, # Numeric multiple
},
"tags": {
"type": "array",
"items": {"type": "string", "minLength": 10, "maxLength": 20},
},
},
"required": ["score", "tags"],
"additionalProperties": False,
"patternProperties": {
"^score$": {"type": "integer"},
},
}
@pytest.fixture
def sample_definition_json_schema():
return {
"$defs": {
"Step": {
"properties": {
"explanation": {"title": "Explanation", "type": "string"},
"output": {"title": "Output", "type": "string"},
},
"required": ["explanation", "output"],
"title": "Step",
"type": "object",
}
},
"properties": {
"steps": {
"items": {"$ref": "#/$defs/Step"},
"title": "Steps",
"type": "array",
},
"final_answer": {"title": "Final Answer", "type": "string"},
},
"required": ["steps", "final_answer"],
"title": "MathReasoning",
"type": "object",
"additionalProperties": False,
}
@pytest.fixture
def sample_structured_outputs_choices():
return [
"Python",
"Java",
"JavaScript",
"C++",
"C#",
"PHP",
"TypeScript",
"Ruby",
"Swift",
"Kotlin",
]
@pytest.fixture
def sample_sql_ebnf():
return """
root ::= select_statement
select_statement ::= "SELECT" column "from" table "where" condition
column ::= "col_1" | "col_2"
table ::= "table_1" | "table_2"
condition ::= column "=" number
number ::= "1" | "2"
"""
@pytest.fixture
def sample_sql_lark():
return """
start: select_statement
select_statement: "SELECT" column "from" table "where" condition
column: "col_1" | "col_2"
table: "table_1" | "table_2"
condition: column "=" number
number: "1" | "2"
"""