[ROCm][CI] Fix tool use test stability - disable skinny GEMM, prefix caching, eliminate batch variance (#35553)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
Andreas Karatzas
2026-03-06 01:15:12 -06:00
committed by GitHub
parent 5afb387bd4
commit 807d680337
5 changed files with 33 additions and 17 deletions

View File

@@ -109,6 +109,20 @@ else:
VLLM_PATH = Path(__file__).parent.parent
"""Path to root of the vLLM repository."""
# ROCm: disable skinny GEMM to avoid non-deterministic results from
# atomic reductions in wvSplitKrc kernel.
# See: https://github.com/vllm-project/vllm/pull/33493#issuecomment-3906083975
ROCM_ENV_OVERRIDES = (
{"VLLM_ROCM_USE_SKINNY_GEMM": "0"} if current_platform.is_rocm() else {}
)
# ROCm: disable prefix caching and eliminate batch variance to reduce
# test flakiness.
ROCM_EXTRA_ARGS = (
["--no-enable-prefix-caching", "--max-num-seqs", "1"]
if current_platform.is_rocm()
else []
)
class RemoteVLLMServer:
"""Base class for launching vLLM server subprocesses for testing.