diff --git a/.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh b/.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh index 10d2e2364..b571618f4 100755 --- a/.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh +++ b/.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh @@ -128,7 +128,7 @@ run_and_track_test() { # --- Actual Test Execution --- run_and_track_test 1 "test_struct_output_generate.py" \ - "HF_HUB_DISABLE_XET=1 python3 -m pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py -k \"not test_structured_output_with_reasoning_matrices\"" + "python3 -m pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py -k \"not test_structured_output_with_reasoning_matrices\"" run_and_track_test 2 "test_moe_pallas.py" \ "python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_moe_pallas.py" run_and_track_test 3 "test_lora.py" \ diff --git a/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh b/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh index 9e7b5a546..d55a786e4 100755 --- a/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh +++ b/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh @@ -134,7 +134,7 @@ run_and_track_test 1 "test_compilation.py" \ run_and_track_test 2 "test_basic.py" \ "python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_basic.py" run_and_track_test 3 "test_accuracy.py::test_lm_eval_accuracy_v1_engine" \ - "HF_HUB_DISABLE_XET=1 python3 -m pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine" + "python3 -m pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine" run_and_track_test 4 "test_quantization_accuracy.py" \ "python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py" run_and_track_test 5 "examples/offline_inference/tpu.py" \ diff --git a/tests/entrypoints/llm/test_accuracy.py b/tests/entrypoints/llm/test_accuracy.py index 39bc8ab07..5d605e906 100644 --- a/tests/entrypoints/llm/test_accuracy.py +++ b/tests/entrypoints/llm/test_accuracy.py @@ -96,9 +96,6 @@ def test_lm_eval_accuracy_v1_engine_fp8_kv_cache( more_args = None if current_platform.is_tpu(): # Limit compilation time for TPU V1 - - # xet doesn't work well for Qwen/Qwen3-1.7B - m.setenv("HF_HUB_DISABLE_XET", "1") more_args = "max_model_len=2048,max_num_seqs=128,kv_cache_dtype=fp8" # Add TP test (if provided)