[Test] Only Run MLA model when user explicitly set for batch invariance (#37719)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
Wentao Ye
2026-03-22 09:09:12 -04:00
committed by GitHub
parent 77d24c4bfe
commit eaf4978621
3 changed files with 23 additions and 27 deletions

View File

@@ -17,7 +17,7 @@ from typing import Any
import openai
import pytest
from utils import BACKENDS, _random_prompt, resolve_model_name, skip_unsupported
from utils import BACKENDS, TEST_MODEL, _random_prompt, skip_unsupported
from tests.utils import RemoteOpenAIServer
@@ -139,7 +139,6 @@ def test_logprobs_bitwise_batch_invariance_bs1_vs_bsN(
backend: str,
) -> None:
random.seed(int(os.getenv("VLLM_TEST_SEED", "12345")))
model_name = resolve_model_name(backend)
prompts_all = [_random_prompt(10, 50) for _ in range(32)]
sp_kwargs: dict[str, Any] = {
@@ -159,11 +158,11 @@ def test_logprobs_bitwise_batch_invariance_bs1_vs_bsN(
if tp_size:
server_args += ["-tp", tp_size]
with RemoteOpenAIServer(model_name, server_args) as server:
with RemoteOpenAIServer(TEST_MODEL, server_args) as server:
client = server.get_client()
_compare_bs1_vs_bsn_single_process(
prompts=prompts_all,
sp_kwargs=sp_kwargs,
client=client,
model_name=model_name,
model_name=TEST_MODEL,
)