Convert formatting to use ruff instead of yapf + isort (#26247)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -18,20 +18,25 @@ from ...utils import check_embeddings_close
|
||||
# case won't pass because gte-Qwen2-1.5B-instruct will cache custom
|
||||
# model code with bidirectional attention.
|
||||
# [Decoder-only]
|
||||
pytest.param("BAAI/bge-multilingual-gemma2",
|
||||
marks=[pytest.mark.core_model, pytest.mark.slow_test]),
|
||||
pytest.param(
|
||||
"BAAI/bge-multilingual-gemma2",
|
||||
marks=[pytest.mark.core_model, pytest.mark.slow_test],
|
||||
),
|
||||
pytest.param(
|
||||
"intfloat/e5-mistral-7b-instruct",
|
||||
# CPU v1 doesn't support sliding window
|
||||
marks=[pytest.mark.core_model]),
|
||||
pytest.param("ssmits/Qwen2-7B-Instruct-embed-base",
|
||||
marks=[pytest.mark.cpu_model]),
|
||||
marks=[pytest.mark.core_model],
|
||||
),
|
||||
pytest.param(
|
||||
"ssmits/Qwen2-7B-Instruct-embed-base", marks=[pytest.mark.cpu_model]
|
||||
),
|
||||
# [Encoder-only]
|
||||
pytest.param(
|
||||
"BAAI/bge-base-en-v1.5",
|
||||
marks=[
|
||||
pytest.mark.core_model, pytest.mark.cpu_model,
|
||||
pytest.mark.slow_test
|
||||
pytest.mark.core_model,
|
||||
pytest.mark.cpu_model,
|
||||
pytest.mark.slow_test,
|
||||
],
|
||||
),
|
||||
pytest.param("sentence-transformers/all-MiniLM-L12-v2"),
|
||||
@@ -50,7 +55,6 @@ def test_models(
|
||||
model,
|
||||
monkeypatch,
|
||||
) -> None:
|
||||
|
||||
if model == "BAAI/bge-multilingual-gemma2" and current_platform.is_rocm():
|
||||
# ROCm Triton FA does not currently support sliding window attention
|
||||
# switch to use ROCm CK FA backend
|
||||
@@ -58,13 +62,14 @@ def test_models(
|
||||
|
||||
vllm_extra_kwargs = {}
|
||||
if model == "ssmits/Qwen2-7B-Instruct-embed-base":
|
||||
vllm_extra_kwargs["pooler_config"] = \
|
||||
PoolerConfig(pooling_type="MEAN", normalize=False)
|
||||
vllm_extra_kwargs["pooler_config"] = PoolerConfig(
|
||||
pooling_type="MEAN", normalize=False
|
||||
)
|
||||
|
||||
max_model_len: Optional[int] = 512
|
||||
if model in [
|
||||
"sentence-transformers/all-MiniLM-L12-v2",
|
||||
"sentence-transformers/stsb-roberta-base-v2"
|
||||
"sentence-transformers/all-MiniLM-L12-v2",
|
||||
"sentence-transformers/stsb-roberta-base-v2",
|
||||
]:
|
||||
max_model_len = None
|
||||
|
||||
@@ -79,10 +84,9 @@ def test_models(
|
||||
with hf_runner(model, is_sentence_transformer=True) as hf_model:
|
||||
hf_outputs = hf_model.encode(example_prompts)
|
||||
|
||||
with vllm_runner(model,
|
||||
runner="pooling",
|
||||
max_model_len=max_model_len,
|
||||
**vllm_extra_kwargs) as vllm_model:
|
||||
with vllm_runner(
|
||||
model, runner="pooling", max_model_len=max_model_len, **vllm_extra_kwargs
|
||||
) as vllm_model:
|
||||
vllm_outputs = vllm_model.embed(example_prompts)
|
||||
|
||||
check_embeddings_close(
|
||||
|
||||
Reference in New Issue
Block a user