group: Entrypoints depends_on: - image-build steps: - label: Entrypoints Unit Tests timeout_in_minutes: 10 working_dir: "/vllm-workspace/tests" source_file_dependencies: - vllm/entrypoints - tests/entrypoints/ commands: - pytest -v -s entrypoints/openai/tool_parsers - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/serve/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling - label: Entrypoints Integration (LLM) timeout_in_minutes: 40 working_dir: "/vllm-workspace/tests" source_file_dependencies: - vllm/ - tests/entrypoints/llm - tests/entrypoints/offline_mode commands: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py - pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process - pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests - label: Entrypoints Integration (API Server openai - Part 1) timeout_in_minutes: 50 working_dir: "/vllm-workspace/tests" source_file_dependencies: - vllm/ - tests/entrypoints/openai - tests/entrypoints/test_chat_utils commands: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -v -s entrypoints/openai/chat_completion --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py mirror: amd: device: mi325_1 depends_on: - image-build-amd - label: Entrypoints Integration (API Server openai - Part 2) timeout_in_minutes: 50 working_dir: "/vllm-workspace/tests" source_file_dependencies: - vllm/ - tests/entrypoints/openai - tests/entrypoints/test_chat_utils commands: - pytest -v -s entrypoints/openai/completion --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py - pytest -v -s entrypoints/openai/speech_to_text/ - pytest -v -s entrypoints/test_chat_utils.py mirror: amd: device: mi325_1 depends_on: - image-build-amd - label: Entrypoints Integration (API Server openai - Part 3) timeout_in_minutes: 50 working_dir: "/vllm-workspace/tests" source_file_dependencies: - vllm/ - tests/entrypoints/openai - tests/entrypoints/test_chat_utils commands: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion --ignore=entrypoints/openai/completion --ignore=entrypoints/openai/speech_to_text/ --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses --ignore=entrypoints/openai/test_multi_api_servers.py - label: Entrypoints Integration (API Server 2) timeout_in_minutes: 130 working_dir: "/vllm-workspace/tests" source_file_dependencies: - vllm/ - tests/entrypoints/rpc - tests/entrypoints/serve/instrumentator - tests/tool_use commands: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -v -s entrypoints/serve/instrumentator - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc - pytest -v -s tool_use - label: Entrypoints Integration (Pooling) timeout_in_minutes: 50 working_dir: "/vllm-workspace/tests" source_file_dependencies: - vllm/ - tests/entrypoints/pooling commands: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -v -s entrypoints/pooling - label: Entrypoints Integration (Responses API) timeout_in_minutes: 50 working_dir: "/vllm-workspace/tests" source_file_dependencies: - vllm/ - tests/entrypoints/openai/responses commands: - pytest -v -s entrypoints/openai/responses - label: OpenAI API Correctness timeout_in_minutes: 30 source_file_dependencies: - csrc/ - vllm/entrypoints/openai/ - vllm/model_executor/models/whisper.py commands: # LMEval+Transcription WER check - pytest -s entrypoints/openai/correctness/