[Hardware][Intel GPU] Add v1 Intel GPU support with Flash attention backend. (#19560)

Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
2025-06-27 00:27:18 +08:00
parent 0bceac9810
commit b69781f107
10 changed files with 393 additions and 42 deletions
--- a/.buildkite/scripts/hardware_ci/run-xpu-test.sh
+++ b/.buildkite/scripts/hardware_ci/run-xpu-test.sh
@@ -28,4 +28,5 @@ docker run \
    sh -c '
    VLLM_USE_V1=0 python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m
    VLLM_USE_V1=0 python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m -tp 2
+    VLLM_USE_V1=1 python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager
 '