Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-05 15:06:22 +01:00
committed by GitHub
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions

View File

@@ -51,8 +51,9 @@ AITER_MODEL_LIST = [
pytest.param(
"google/gemma-1.1-2b-it", # gemma
marks=[
pytest.mark.core_model, pytest.mark.cpu_model,
pytest.mark.slow_test
pytest.mark.core_model,
pytest.mark.cpu_model,
pytest.mark.slow_test,
],
),
pytest.param(
@@ -65,8 +66,7 @@ AITER_MODEL_LIST = [
pytest.param(
"openbmb/MiniCPM3-4B",
# fused_moe not supported on CPU
marks=[pytest.mark.core_model,
large_gpu_mark(min_gb=32)],
marks=[pytest.mark.core_model, large_gpu_mark(min_gb=32)],
),
pytest.param(
"facebook/opt-125m", # opt
@@ -82,8 +82,9 @@ AITER_MODEL_LIST = [
pytest.param(
"Qwen/Qwen2.5-0.5B-Instruct", # qwen2
marks=[
pytest.mark.core_model, pytest.mark.cpu_model,
pytest.mark.slow_test
pytest.mark.core_model,
pytest.mark.cpu_model,
pytest.mark.slow_test,
],
),
pytest.param(
@@ -100,16 +101,25 @@ AITER_MODEL_LIST = [
marks=[pytest.mark.cpu_model],
),
pytest.param("swiss-ai/Apertus-8B-2509"), # apertus
])
],
)
@pytest.mark.parametrize("max_tokens", [32])
@pytest.mark.parametrize("num_logprobs", [5])
@pytest.mark.parametrize(
"use_rocm_aiter", [True, False] if current_platform.is_rocm() else [False])
"use_rocm_aiter", [True, False] if current_platform.is_rocm() else [False]
)
@pytest.mark.parametrize("use_prompt_embeds", [True, False])
def test_models(hf_runner, vllm_runner, example_prompts, model: str,
max_tokens: int, num_logprobs: int, use_rocm_aiter: bool,
use_prompt_embeds: bool, monkeypatch) -> None:
def test_models(
hf_runner,
vllm_runner,
example_prompts,
model: str,
max_tokens: int,
num_logprobs: int,
use_rocm_aiter: bool,
use_prompt_embeds: bool,
monkeypatch,
) -> None:
model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
model_info.check_available_online(on_fail="skip")
model_info.check_transformers_version(on_fail="skip")
@@ -125,34 +135,37 @@ def test_models(hf_runner, vllm_runner, example_prompts, model: str,
with hf_runner(model) as hf_model:
hf_outputs = hf_model.generate_greedy_logprobs_limit(
example_prompts, max_tokens, num_logprobs)
example_prompts, max_tokens, num_logprobs
)
prompt_embeds: Optional[list[torch.Tensor]] = ([] if use_prompt_embeds
else None)
prompt_embeds: Optional[list[torch.Tensor]] = [] if use_prompt_embeds else None
prompt_token_ids = []
for prompt in example_prompts:
token_ids = hf_model.tokenizer(prompt,
return_tensors="pt").input_ids.to(
hf_model.model.device)
token_ids = hf_model.tokenizer(prompt, return_tensors="pt").input_ids.to(
hf_model.model.device
)
prompt_token_ids.append(token_ids)
if prompt_embeds is not None:
prompt_embeds.append(hf_model.model.get_input_embeddings()(
token_ids).squeeze(0))
prompt_embeds.append(
hf_model.model.get_input_embeddings()(token_ids).squeeze(0)
)
with vllm_runner(
model,
tokenizer_name=model_info.tokenizer or model,
tokenizer_mode=model_info.tokenizer_mode,
trust_remote_code=model_info.trust_remote_code,
max_num_seqs=2,
enable_prompt_embeds=use_prompt_embeds,
model,
tokenizer_name=model_info.tokenizer or model,
tokenizer_mode=model_info.tokenizer_mode,
trust_remote_code=model_info.trust_remote_code,
max_num_seqs=2,
enable_prompt_embeds=use_prompt_embeds,
) as vllm_model:
vllm_outputs = vllm_model.generate_greedy_logprobs(
example_prompts, max_tokens, num_logprobs)
example_prompts, max_tokens, num_logprobs
)
if prompt_embeds is not None:
vllm_outputs_from_embeds = vllm_model.generate_greedy_logprobs(
prompt_embeds, max_tokens, num_logprobs)
prompt_embeds, max_tokens, num_logprobs
)
check_logprobs_close(
outputs_0_lst=hf_outputs,