diff --git a/.buildkite/test_areas/misc.yaml b/.buildkite/test_areas/misc.yaml index f58aa204b..c6b43b97a 100644 --- a/.buildkite/test_areas/misc.yaml +++ b/.buildkite/test_areas/misc.yaml @@ -147,6 +147,19 @@ steps: - pytest -v -s transformers_utils - pytest -v -s config +- label: GPT-OSS Eval (H100) + timeout_in_minutes: 60 + working_dir: "/vllm-workspace/" + device: h100 + optional: true + source_file_dependencies: + - tests/evals/gpt_oss + - vllm/model_executor/models/gpt_oss.py + - vllm/model_executor/layers/quantization/mxfp4.py + commands: + - uv pip install --system 'gpt-oss[eval]==0.0.5' + - pytest -s -v tests/evals/gpt_oss/test_gpqa_correctness.py --model openai/gpt-oss-20b --metric 0.58 + - label: GPT-OSS Eval (B200) timeout_in_minutes: 60 working_dir: "/vllm-workspace/"