[V0 Deprecation] Remove VLLM_USE_V1 from tests (#26341)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -103,7 +103,6 @@ def test_guided_decoding_deprecated():
|
||||
PARAMS_MODELS_BACKENDS_TOKENIZER_MODE,
|
||||
)
|
||||
def test_structured_output(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
sample_json_schema: dict[str, Any],
|
||||
unsupported_json_schema: dict[str, Any],
|
||||
sample_sql_ebnf: str,
|
||||
@@ -115,8 +114,6 @@ def test_structured_output(
|
||||
model_name: str,
|
||||
speculative_config: dict[str, Any],
|
||||
):
|
||||
monkeypatch.setenv("VLLM_USE_V1", "1")
|
||||
|
||||
if current_platform.is_tpu() and speculative_config:
|
||||
pytest.skip("TPU does not support speculative decoding")
|
||||
|
||||
@@ -620,15 +617,12 @@ Make the response as short as possible.
|
||||
],
|
||||
)
|
||||
def test_structured_output_with_reasoning_matrices(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
backend: str,
|
||||
tokenizer_mode: TokenizerMode,
|
||||
reasoning_parser: str,
|
||||
model_name: str,
|
||||
speculative_config: dict[str, Any] | None,
|
||||
):
|
||||
monkeypatch.setenv("VLLM_USE_V1", "1")
|
||||
|
||||
if current_platform.is_tpu() and speculative_config:
|
||||
pytest.skip("TPU does not support speculative decoding")
|
||||
|
||||
@@ -691,13 +685,10 @@ def test_structured_output_with_reasoning_matrices(
|
||||
@pytest.mark.skip_global_cleanup
|
||||
@pytest.mark.parametrize("model_name, tokenizer_mode", PARAMS_MODELS_TOKENIZER_MODE)
|
||||
def test_structured_output_auto_mode(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
unsupported_json_schema: dict[str, Any],
|
||||
model_name: str,
|
||||
tokenizer_mode: str,
|
||||
):
|
||||
monkeypatch.setenv("VLLM_USE_V1", "1")
|
||||
|
||||
llm = LLM(
|
||||
model=model_name,
|
||||
max_model_len=1024,
|
||||
@@ -739,9 +730,7 @@ def test_structured_output_auto_mode(
|
||||
|
||||
|
||||
@pytest.mark.skip_global_cleanup
|
||||
def test_guidance_no_additional_properties(monkeypatch: pytest.MonkeyPatch):
|
||||
monkeypatch.setenv("VLLM_USE_V1", "1")
|
||||
|
||||
def test_guidance_no_additional_properties():
|
||||
llm = LLM(
|
||||
model="Qwen/Qwen2.5-1.5B-Instruct",
|
||||
max_model_len=1024,
|
||||
@@ -801,12 +790,9 @@ def test_guidance_no_additional_properties(monkeypatch: pytest.MonkeyPatch):
|
||||
|
||||
@pytest.mark.parametrize("backend", ["guidance", "xgrammar", "outlines"])
|
||||
def test_structured_output_batched_with_non_structured_outputs_requests(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
sample_json_schema: dict[str, Any],
|
||||
backend: str,
|
||||
):
|
||||
monkeypatch.setenv("VLLM_USE_V1", "1")
|
||||
|
||||
# Don't use eager execution on TPUs because we want to test for no
|
||||
# recompilation at runtime
|
||||
enforce_eager = bool(not current_platform.is_tpu())
|
||||
|
||||
Reference in New Issue
Block a user