diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index 49987880c..b7254efd2 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -1801,6 +1801,19 @@ steps: - tests/v1/e2e commands: - pytest -v -s v1/e2e/spec_decode/test_spec_decode.py -k "eagle_correctness_heavy" + + +- label: V1 e2e (4xH100-4xMI325) # TBD + timeout_in_minutes: 180 + mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325] + agent_pool: mi325_4 + optional: true + source_file_dependencies: + - vllm/v1/attention/backends/utils.py + - vllm/v1/worker/gpu_model_runner.py + - tests/v1/e2e/test_hybrid_chunked_prefill.py + commands: + - pytest -v -s v1/e2e/test_hybrid_chunked_prefill.py - label: V1 Spec Decode # TBD diff --git a/tests/v1/e2e/test_hybrid_chunked_prefill.py b/tests/v1/e2e/test_hybrid_chunked_prefill.py index 1790343ca..dd8a5f5cb 100644 --- a/tests/v1/e2e/test_hybrid_chunked_prefill.py +++ b/tests/v1/e2e/test_hybrid_chunked_prefill.py @@ -36,14 +36,20 @@ MESSAGES = [ ] -@pytest.mark.skipif(not current_platform.is_cuda(), reason="CUDA not available") @pytest.mark.parametrize( "model_name", [ pytest.param("Qwen/Qwen3.5-4B", marks=[large_gpu_mark(min_gb=40)]), pytest.param( "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8", - marks=[large_gpu_mark(min_gb=80)] + multi_gpu_marks(num_gpus=4), + marks=[large_gpu_mark(min_gb=80)] + + multi_gpu_marks(num_gpus=4) + + [ + pytest.mark.skipif( + not current_platform.is_cuda(), + reason="modelopt quantization is supported only on CUDA", + ) + ], ), ], )