[QeRL] Fix online quantized reloading (#38442)

Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
2026-03-29 16:56:41 -04:00
parent 995dea1354
commit d28d86e8a3
9 changed files with 104 additions and 62 deletions
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -812,7 +812,7 @@ steps:
  commands:
  - apt-get update && apt-get install -y curl libsodium23
  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s model_executor
+  - pytest -v -s model_executor -m '(not slow_test)'
  - pytest -v -s entrypoints/openai/completion/test_tensorizer_entrypoint.py


@@ -1242,7 +1242,7 @@ steps:
  - vllm/platforms/rocm.py
  commands:
  - TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m 'distributed(num_gpus=2)'
-  - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s model_executor/model_loader/test_sharded_state_loader.py
+  - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s model_executor/model_loader/test_sharded_state_loader.py -m '(not slow_test)'
  - pytest models/test_transformers.py -v -s -m 'distributed(num_gpus=2)'
  - pytest models/language -v -s -m 'distributed(num_gpus=2)'
  - pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' --ignore models/multimodal/generation/test_whisper.py
@@ -2501,7 +2501,7 @@ steps:
  - tests/models/
  commands:
  - TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m 'distributed(num_gpus=2)'
-  - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s model_executor/model_loader/test_sharded_state_loader.py
+  - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s model_executor/model_loader/test_sharded_state_loader.py -m '(not slow_test)'
  - pytest models/test_transformers.py -v -s -m 'distributed(num_gpus=2)'
  - pytest models/language -v -s -m 'distributed(num_gpus=2)'
  - pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' --ignore models/multimodal/generation/test_whisper.py