# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import json from http import HTTPStatus from typing import Final import pytest import schemathesis from httpx import URL from hypothesis import settings from schemathesis import GenerationConfig from schemathesis.checks import not_a_server_error from schemathesis.internal.checks import CheckContext from schemathesis.models import Case from schemathesis.transports.responses import GenericResponse from ...utils import RemoteOpenAIServer schemathesis.experimental.OPEN_API_3_1.enable() MODEL_NAME = "HuggingFaceTB/SmolVLM-256M-Instruct" MAXIMUM_IMAGES = 2 DEFAULT_TIMEOUT_SECONDS: Final[int] = 10 LONG_TIMEOUT_SECONDS: Final[int] = 60 @pytest.fixture(scope="module") def server(): args = [ "--runner", "generate", "--max-model-len", "2048", "--max-num-seqs", "5", "--enforce-eager", "--trust-remote-code", "--limit-mm-per-prompt", json.dumps({"image": MAXIMUM_IMAGES}), ] with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: yield remote_server @pytest.fixture(scope="module") def get_schema(server): # avoid generating null (\x00) bytes in strings during test case generation return schemathesis.openapi.from_uri( f"{server.url_root}/openapi.json", generation_config=GenerationConfig(allow_x00=False), ) schema = schemathesis.from_pytest_fixture("get_schema") @schemathesis.hook def before_generate_case(context: schemathesis.hooks.HookContext, strategy): op = context.operation assert op is not None def no_invalid_types(case: schemathesis.models.Case): """ This filter skips test cases with invalid data that schemathesis incorrectly generates due to permissive schema configurations. 1. Skips `POST /tokenize` endpoint cases with `"type": "file"` in message content, which isn't implemented. 2. Skips tool_calls with `"type": "custom"` which schemathesis incorrectly generates instead of the valid `"type": "function"`. Example test cases that are skipped: curl -X POST -H 'Content-Type: application/json' \ -d '{"messages": [{"content": [{"file": {}, "type": "file"}], "role": "user"}]}' \ http://localhost:8000/tokenize curl -X POST -H 'Content-Type: application/json' \ -d '{"messages": [{"role": "assistant", "tool_calls": [{"custom": {"input": "", "name": ""}, "id": "", "type": "custom"}]}]}' \ http://localhost:8000/v1/chat/completions """ # noqa: E501 if hasattr(case, "body") and isinstance(case.body, dict): if ( "messages" in case.body and isinstance(case.body["messages"], list) and len(case.body["messages"]) > 0 ): for message in case.body["messages"]: if not isinstance(message, dict): continue # Check for invalid file type in tokenize endpoint if op.method.lower() == "post" and op.path == "/tokenize": content = message.get("content", []) if ( isinstance(content, list) and len(content) > 0 and any( isinstance(item, dict) and item.get("type") == "file" for item in content ) ): return False # Check for invalid tool_calls with non-function types tool_calls = message.get("tool_calls", []) if isinstance(tool_calls, list): for tool_call in tool_calls: if isinstance(tool_call, dict): if tool_call.get("type") != "function": return False if "custom" in tool_call: return False # Sometimes structured_outputs.grammar is generated to be empty # Causing a server error in EBNF grammar parsing # https://github.com/vllm-project/vllm/pull/22587#issuecomment-3195253421 structured_outputs = case.body.get("structured_outputs", {}) grammar = ( structured_outputs.get("grammar") if isinstance(structured_outputs, dict) else None ) if grammar == "": # Allow None (will be handled as no grammar) # But skip empty strings return False return True return strategy.filter(no_invalid_types) def customized_not_a_server_error( ctx: CheckContext, response: GenericResponse, case: Case ) -> bool | None: try: return not_a_server_error(ctx, response, case) except Exception: if ( URL(response.request.url).path in ["/v1/chat/completions/render", "/v1/chat/completions"] and response.status_code == HTTPStatus.NOT_IMPLEMENTED.value ): return True raise @schema.parametrize() @schema.override(headers={"Content-Type": "application/json"}) @settings(deadline=LONG_TIMEOUT_SECONDS * 1000, max_examples=50) def test_openapi_stateless(case: Case): key = ( case.operation.method.upper(), case.operation.path, ) if case.operation.path.startswith("/v1/responses"): # Skip responses API as it is meant to be stateful. return # Skip weight transfer endpoints as they require special setup # (weight_transfer_config) and are meant to be stateful. if case.operation.path in ( "/init_weight_transfer_engine", "/update_weights", ): return timeout = { # requires a longer timeout ("POST", "/v1/chat/completions"): LONG_TIMEOUT_SECONDS, ("POST", "/v1/completions"): LONG_TIMEOUT_SECONDS, ("POST", "/v1/messages"): LONG_TIMEOUT_SECONDS, }.get(key, DEFAULT_TIMEOUT_SECONDS) # No need to verify SSL certificate for localhost case.call_and_validate( verify=False, timeout=timeout, additional_checks=(customized_not_a_server_error,), excluded_checks=(not_a_server_error,), )