[Bug] Fix torch dynamo warning Dynamo detected a call to a functools.lru_cache (#29038)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
2025-11-20 03:52:23 -05:00
parent 1e1c06789e
commit 2c52c7fd9a
5 changed files with 52 additions and 40 deletions
--- a/tests/v1/determinism/utils.py
+++ b/tests/v1/determinism/utils.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import os
 import random

 import pytest
@@ -12,6 +13,25 @@ skip_unsupported = pytest.mark.skipif(
    reason="Requires CUDA and >= Hopper (SM90)",
 )

+BACKENDS: list[str] = [
+    "FLASH_ATTN",
+    "FLASHINFER",
+]
+
+if current_platform.is_cuda() and current_platform.is_device_capability(90):
+    BACKENDS.append("FLASH_ATTN_MLA")
+
+DEFAULT_MODEL = "Qwen/Qwen3-1.7B"
+MLA_MODEL = "deepseek-ai/DeepSeek-V2-Lite-Chat"
+
+
+def resolve_model_name(backend: str) -> str:
+    """Resolve the model name for the given backend."""
+    model = os.getenv("VLLM_TEST_MODEL", DEFAULT_MODEL)
+    if backend.endswith("MLA") and model == DEFAULT_MODEL:
+        return MLA_MODEL
+    return model
+

 def _random_prompt(min_words: int = 1024, max_words: int = 1024 * 2) -> str:
    # Generate more realistic prompts that will actually produce varied tokens