[ROCm][CI] Fix plugin tests (2 GPUs) failures on ROCm and removing VLLM_FLOAT32_MATMUL_PRECISION from all ROCm tests (#31829)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
2026-01-06 19:12:23 -06:00
parent 9a1d20a89c
commit 364a8bc6dc
2 changed files with 5 additions and 9 deletions
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -163,9 +163,7 @@ steps:
  commands:
  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/test_vision_embeds.py
-  # Need tf32 to avoid conflicting precision issue with terratorch on ROCm.
-  # TODO: Remove after next torch update
-  - VLLM_FLOAT32_MATMUL_PRECISION="tf32" pytest -v -s entrypoints/openai/test_vision_embeds.py
+  - pytest -v -s entrypoints/openai/test_vision_embeds.py
  - pytest -v -s entrypoints/test_chat_utils.py

 - label: Entrypoints Integration Test (API Server 2)
@@ -989,9 +987,7 @@ steps:
    - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
    - pip freeze | grep -E 'torch'
    - pytest -v -s models/multimodal -m core_model --ignore models/multimodal/generation/test_whisper.py --ignore models/multimodal/processing --ignore models/multimodal/pooling/test_prithvi_mae.py
-    # Need tf32 to avoid conflicting precision issue with terratorch on ROCm.
-    # TODO: Remove after next torch update
-    - VLLM_FLOAT32_MATMUL_PRECISION="tf32" pytest -v -s models/multimodal/pooling/test_prithvi_mae.py -m core_model
+    - pytest -v -s models/multimodal/pooling/test_prithvi_mae.py -m core_model
    - cd .. && VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model  # Otherwise, mp_method="spawn" doesn't work

 - label: Multi-Modal Accuracy Eval (Small Models) # 5min
@@ -1356,9 +1352,7 @@ steps:
  # end platform plugin tests
  # begin io_processor plugins test, all the code in between uses the prithvi_io_processor plugin
  - pip install -e ./plugins/prithvi_io_processor_plugin
-  # Need tf32 to avoid conflicting precision issue with terratorch on ROCm.
-  # TODO: Remove after next torch update
-  - VLLM_FLOAT32_MATMUL_PRECISION="tf32" pytest -v -s plugins_tests/test_io_processor_plugins.py
+  - pytest -v -s plugins_tests/test_io_processor_plugins.py
  - pip uninstall prithvi_io_processor_plugin -y
  # end io_processor plugins test
  # begin stat_logger plugins test
--- a/requirements/rocm-test.txt
+++ b/requirements/rocm-test.txt
@@ -88,3 +88,5 @@ open-clip-torch==2.32.0
 perceptron==0.1.4
 # Required for the multi-modal models test
 timm==1.0.17
+# Required for plugins test
+albumentations==1.4.6