group: Intel steps: - label: ":docker: Build XPU image" soft_fail: true depends_on: [] key: image-build-xpu commands: - bash -lc '.buildkite/image_build/image_build_xpu.sh "public.ecr.aws/q9t5s3a7" "vllm-ci-test-repo" "$BUILDKITE_COMMIT"' env: DOCKER_BUILDKIT: "1" retry: automatic: - exit_status: -1 # Agent was lost limit: 2 - exit_status: -10 # Agent was lost limit: 2 - label: "XPU example Test" depends_on: - image-build-xpu timeout_in_minutes: 30 device: intel_gpu no_plugin: true env: REGISTRY: "public.ecr.aws/q9t5s3a7" REPO: "vllm-ci-test-repo" source_file_dependencies: - vllm/ - .buildkite/intel_jobs/test-intel.yaml commands: - >- bash .buildkite/scripts/hardware_ci/run-intel-test.sh 'pip install tblib==3.1.0 && python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager && python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --block-size 64 -O3 -cc.cudagraph_mode=NONE && python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager -tp 2 --distributed-executor-backend mp && python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager --attention-backend=TRITON_ATTN && python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager --quantization fp8 && python3 examples/basic/offline_inference/generate.py --model superjob/Qwen3-4B-Instruct-2507-GPTQ-Int4 --block-size 64 --enforce-eager --max-model-len 8192 && python3 examples/basic/offline_inference/generate.py --model ibm-research/PowerMoE-3b --block-size 64 --enforce-eager -tp 2 && python3 examples/basic/offline_inference/generate.py --model ibm-research/PowerMoE-3b --block-size 64 --enforce-eager -tp 2 --enable-expert-parallel' - label: "XPU V1 test" depends_on: - image-build-xpu timeout_in_minutes: 30 device: intel_gpu no_plugin: true env: REGISTRY: "public.ecr.aws/q9t5s3a7" REPO: "vllm-ci-test-repo" source_file_dependencies: - vllm/ - .buildkite/intel_jobs/test-intel.yaml commands: - >- bash .buildkite/scripts/hardware_ci/run-intel-test.sh 'cd tests && pytest -v -s v1/core --ignore=v1/core/test_reset_prefix_cache_e2e.py --ignore=v1/core/test_scheduler_e2e.py && pytest -v -s v1/engine --ignore=v1/engine/test_output_processor.py && pytest -v -s v1/sample --ignore=v1/sample/test_logprobs.py --ignore=v1/sample/test_logprobs_e2e.py -k "not test_topk_only and not test_topp_only and not test_topk_and_topp" && pytest -v -s v1/worker --ignore=v1/worker/test_gpu_model_runner.py --ignore=v1/worker/test_worker_memory_snapshot.py && pytest -v -s v1/structured_output && pytest -v -s v1/test_serial_utils.py && pytest -v -s v1/spec_decode --ignore=v1/spec_decode/test_max_len.py --ignore=v1/spec_decode/test_tree_attention.py --ignore=v1/spec_decode/test_speculators_eagle3.py --ignore=v1/spec_decode/test_acceptance_length.py && pytest -v -s v1/kv_connector/unit --ignore=v1/kv_connector/unit/test_multi_connector.py --ignore=v1/kv_connector/unit/test_example_connector.py --ignore=v1/kv_connector/unit/test_lmcache_integration.py --ignore=v1/kv_connector/unit/test_hf3fs_client.py --ignore=v1/kv_connector/unit/test_hf3fs_connector.py --ignore=v1/kv_connector/unit/test_hf3fs_metadata_server.py'