[ROCm][CI] Fix tool use test stability - disable skinny GEMM, prefix caching, eliminate batch variance (#35553)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
@@ -109,6 +109,20 @@ else:
|
||||
VLLM_PATH = Path(__file__).parent.parent
|
||||
"""Path to root of the vLLM repository."""
|
||||
|
||||
# ROCm: disable skinny GEMM to avoid non-deterministic results from
|
||||
# atomic reductions in wvSplitKrc kernel.
|
||||
# See: https://github.com/vllm-project/vllm/pull/33493#issuecomment-3906083975
|
||||
ROCM_ENV_OVERRIDES = (
|
||||
{"VLLM_ROCM_USE_SKINNY_GEMM": "0"} if current_platform.is_rocm() else {}
|
||||
)
|
||||
# ROCm: disable prefix caching and eliminate batch variance to reduce
|
||||
# test flakiness.
|
||||
ROCM_EXTRA_ARGS = (
|
||||
["--no-enable-prefix-caching", "--max-num-seqs", "1"]
|
||||
if current_platform.is_rocm()
|
||||
else []
|
||||
)
|
||||
|
||||
|
||||
class RemoteVLLMServer:
|
||||
"""Base class for launching vLLM server subprocesses for testing.
|
||||
|
||||
Reference in New Issue
Block a user