Revert "[V0 deprecation] Remove V0 CPU/XPU/TPU backends (#20412)"

This reverts commit e202dd2736.
2025-07-06 14:02:36 -07:00
parent c18b3b8e8b
commit a5dd03c1eb
20 changed files with 5034 additions and 46 deletions
--- a/.buildkite/scripts/hardware_ci/run-cpu-test.sh
+++ b/.buildkite/scripts/hardware_ci/run-cpu-test.sh
@@ -66,10 +66,10 @@ function cpu_tests() {
    tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_dynamic_per_token"

  # Run AWQ test
-  # docker exec cpu-test-"$NUMA_NODE" bash -c "
-  #   set -e
-  #   VLLM_USE_V1=0 pytest -s -v \
-  #   tests/quantization/test_ipex_quant.py"
+  docker exec cpu-test-"$NUMA_NODE" bash -c "
+    set -e
+    VLLM_USE_V1=0 pytest -s -v \
+    tests/quantization/test_ipex_quant.py"

  # Run chunked-prefill and prefix-cache test
  docker exec cpu-test-"$NUMA_NODE" bash -c "
--- a/.buildkite/scripts/hardware_ci/run-xpu-test.sh
+++ b/.buildkite/scripts/hardware_ci/run-xpu-test.sh
@@ -26,5 +26,7 @@ docker run \
    --name "${container_name}" \
    "${image_name}" \
    sh -c '
+    VLLM_USE_V1=0 python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m
+    VLLM_USE_V1=0 python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m -tp 2
    VLLM_USE_V1=1 python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager
 '