group: Engine depends_on: - image-build steps: - label: Engine timeout_in_minutes: 15 source_file_dependencies: - vllm/ - tests/engine - tests/test_sequence - tests/test_config - tests/test_logger - tests/test_vllm_port commands: - pytest -v -s engine test_sequence.py test_config.py test_logger.py test_vllm_port.py - label: V1 e2e + engine (1 GPU) timeout_in_minutes: 45 source_file_dependencies: - vllm/ - tests/v1 commands: # TODO: accuracy does not match, whether setting # VLLM_USE_FLASHINFER_SAMPLER or not on H100. - pytest -v -s v1/e2e # Run this test standalone for now; # need to untangle use (implicit) use of spawn/fork across the tests. - pytest -v -s v1/engine/test_preprocess_error_handling.py # Run the rest of v1/engine tests - pytest -v -s v1/engine --ignore v1/engine/test_preprocess_error_handling.py mirror: amd: device: mi325_1 depends_on: - image-build-amd commands: - pytest -v -s v1/e2e - pytest -v -s v1/engine - label: V1 e2e (2 GPUs) timeout_in_minutes: 60 # TODO: Fix timeout after we have more confidence in the test stability optional: true num_devices: 2 source_file_dependencies: - vllm/ - tests/v1/e2e commands: # Only run tests that need exactly 2 GPUs - pytest -v -s v1/e2e/test_spec_decode.py -k "tensor_parallelism" mirror: amd: device: mi325_2 depends_on: - image-build-amd - label: V1 e2e (4 GPUs) timeout_in_minutes: 60 # TODO: Fix timeout after we have more confidence in the test stability optional: true num_devices: 4 source_file_dependencies: - vllm/ - tests/v1/e2e commands: # Only run tests that need 4 GPUs - pytest -v -s v1/e2e/test_spec_decode.py -k "eagle_correctness_heavy" mirror: amd: device: mi325_4 depends_on: - image-build-amd