[Bug] Fix torch dynamo warning Dynamo detected a call to a functools.lru_cache (#29038)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import os
|
||||
import random
|
||||
|
||||
import pytest
|
||||
@@ -12,6 +13,25 @@ skip_unsupported = pytest.mark.skipif(
|
||||
reason="Requires CUDA and >= Hopper (SM90)",
|
||||
)
|
||||
|
||||
BACKENDS: list[str] = [
|
||||
"FLASH_ATTN",
|
||||
"FLASHINFER",
|
||||
]
|
||||
|
||||
if current_platform.is_cuda() and current_platform.is_device_capability(90):
|
||||
BACKENDS.append("FLASH_ATTN_MLA")
|
||||
|
||||
DEFAULT_MODEL = "Qwen/Qwen3-1.7B"
|
||||
MLA_MODEL = "deepseek-ai/DeepSeek-V2-Lite-Chat"
|
||||
|
||||
|
||||
def resolve_model_name(backend: str) -> str:
|
||||
"""Resolve the model name for the given backend."""
|
||||
model = os.getenv("VLLM_TEST_MODEL", DEFAULT_MODEL)
|
||||
if backend.endswith("MLA") and model == DEFAULT_MODEL:
|
||||
return MLA_MODEL
|
||||
return model
|
||||
|
||||
|
||||
def _random_prompt(min_words: int = 1024, max_words: int = 1024 * 2) -> str:
|
||||
# Generate more realistic prompts that will actually produce varied tokens
|
||||
|
||||
Reference in New Issue
Block a user