[Compile] Fix torch warning TensorFloat32 tensor cores for float32 matrix multiplication available but not enabled (#29897)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
2025-12-09 10:40:37 -05:00
parent 56037dfa2f
commit 83319b44c2
3 changed files with 15 additions and 0 deletions
--- a/tests/v1/e2e/test_async_scheduling.py
+++ b/tests/v1/e2e/test_async_scheduling.py
@@ -124,6 +124,8 @@ def run_tests(
    with monkeypatch.context() as m:
        # avoid precision errors
        m.setenv("VLLM_ATTENTION_BACKEND", "FLEX_ATTENTION")
+        # lock matmul precision to full FP32
+        m.setenv("VLLM_FLOAT32_MATMUL_PRECISION", "highest")
        # m.setenv("VLLM_BATCH_INVARIANT", "1")
        outputs: list[tuple[str, list, list]] = []
        for n, (