[CI] Split pooling from entrypoints Test (#24632)
Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
@@ -113,7 +113,7 @@ steps:
|
|||||||
- tests/entrypoints/
|
- tests/entrypoints/
|
||||||
commands:
|
commands:
|
||||||
- pytest -v -s entrypoints/openai/tool_parsers
|
- pytest -v -s entrypoints/openai/tool_parsers
|
||||||
- pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py
|
- pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling
|
||||||
|
|
||||||
- label: Entrypoints Integration Test (LLM) # 30min
|
- label: Entrypoints Integration Test (LLM) # 30min
|
||||||
timeout_in_minutes: 40
|
timeout_in_minutes: 40
|
||||||
@@ -148,6 +148,19 @@ steps:
|
|||||||
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/test_collective_rpc.py --ignore=entrypoints/openai/tool_parsers/
|
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/test_collective_rpc.py --ignore=entrypoints/openai/tool_parsers/
|
||||||
- pytest -v -s entrypoints/test_chat_utils.py
|
- pytest -v -s entrypoints/test_chat_utils.py
|
||||||
|
|
||||||
|
- label: Entrypoints Integration Test (Pooling)
|
||||||
|
timeout_in_minutes: 50
|
||||||
|
mirror_hardwares: [amdexperimental]
|
||||||
|
working_dir: "/vllm-workspace/tests"
|
||||||
|
fast_check: true
|
||||||
|
torch_nightly: true
|
||||||
|
source_file_dependencies:
|
||||||
|
- vllm/
|
||||||
|
- tests/entrypoints/pooling
|
||||||
|
commands:
|
||||||
|
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
||||||
|
- pytest -v -s entrypoints/pooling
|
||||||
|
|
||||||
- label: Distributed Tests (4 GPUs) # 35min
|
- label: Distributed Tests (4 GPUs) # 35min
|
||||||
timeout_in_minutes: 50
|
timeout_in_minutes: 50
|
||||||
mirror_hardwares: [amdexperimental]
|
mirror_hardwares: [amdexperimental]
|
||||||
|
|||||||
0
tests/entrypoints/pooling/__init__.py
Normal file
0
tests/entrypoints/pooling/__init__.py
Normal file
0
tests/entrypoints/pooling/correctness/__init__.py
Normal file
0
tests/entrypoints/pooling/correctness/__init__.py
Normal file
0
tests/entrypoints/pooling/llm/__init__.py
Normal file
0
tests/entrypoints/pooling/llm/__init__.py
Normal file
@@ -6,11 +6,10 @@ import weakref
|
|||||||
import pytest
|
import pytest
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
from tests.models.utils import softmax
|
||||||
from vllm import LLM, PoolingParams
|
from vllm import LLM, PoolingParams
|
||||||
from vllm.distributed import cleanup_dist_env_and_memory
|
from vllm.distributed import cleanup_dist_env_and_memory
|
||||||
|
|
||||||
from ...models.utils import softmax
|
|
||||||
|
|
||||||
MODEL_NAME = "jason9693/Qwen2.5-1.5B-apeach"
|
MODEL_NAME = "jason9693/Qwen2.5-1.5B-apeach"
|
||||||
|
|
||||||
prompts = ["The chef prepared a delicious meal."]
|
prompts = ["The chef prepared a delicious meal."]
|
||||||
@@ -6,11 +6,10 @@ import weakref
|
|||||||
import pytest
|
import pytest
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
from tests.models.utils import softmax
|
||||||
from vllm import LLM, PoolingParams
|
from vllm import LLM, PoolingParams
|
||||||
from vllm.distributed import cleanup_dist_env_and_memory
|
from vllm.distributed import cleanup_dist_env_and_memory
|
||||||
|
|
||||||
from ...models.utils import softmax
|
|
||||||
|
|
||||||
MODEL_NAME = "internlm/internlm2-1_8b-reward"
|
MODEL_NAME = "internlm/internlm2-1_8b-reward"
|
||||||
|
|
||||||
prompts = ["The chef prepared a delicious meal."]
|
prompts = ["The chef prepared a delicious meal."]
|
||||||
@@ -6,11 +6,10 @@ import weakref
|
|||||||
import pytest
|
import pytest
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
from tests.models.utils import softmax
|
||||||
from vllm import LLM, PoolingParams
|
from vllm import LLM, PoolingParams
|
||||||
from vllm.distributed import cleanup_dist_env_and_memory
|
from vllm.distributed import cleanup_dist_env_and_memory
|
||||||
|
|
||||||
from ...models.utils import softmax
|
|
||||||
|
|
||||||
MODEL_NAME = "tomaarsen/Qwen3-Reranker-0.6B-seq-cls"
|
MODEL_NAME = "tomaarsen/Qwen3-Reranker-0.6B-seq-cls"
|
||||||
|
|
||||||
|
|
||||||
0
tests/entrypoints/pooling/openai/__init__.py
Normal file
0
tests/entrypoints/pooling/openai/__init__.py
Normal file
@@ -6,10 +6,9 @@ import requests
|
|||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
from tests.utils import RemoteOpenAIServer
|
||||||
from vllm.entrypoints.openai.protocol import ClassificationResponse
|
from vllm.entrypoints.openai.protocol import ClassificationResponse
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
|
||||||
|
|
||||||
MODEL_NAME = "jason9693/Qwen2.5-1.5B-apeach"
|
MODEL_NAME = "jason9693/Qwen2.5-1.5B-apeach"
|
||||||
DTYPE = "float32" # Use float32 to avoid NaN issue
|
DTYPE = "float32" # Use float32 to avoid NaN issue
|
||||||
|
|
||||||
@@ -11,14 +11,13 @@ import requests
|
|||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
from tests.models.language.pooling.embed_utils import (
|
||||||
|
run_embedding_correctness_test)
|
||||||
|
from tests.models.utils import check_embeddings_close
|
||||||
|
from tests.utils import RemoteOpenAIServer
|
||||||
from vllm.entrypoints.openai.protocol import EmbeddingResponse
|
from vllm.entrypoints.openai.protocol import EmbeddingResponse
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||||
|
|
||||||
from ...models.language.pooling.embed_utils import (
|
|
||||||
run_embedding_correctness_test)
|
|
||||||
from ...models.utils import check_embeddings_close
|
|
||||||
from ...utils import RemoteOpenAIServer
|
|
||||||
|
|
||||||
MODEL_NAME = "intfloat/multilingual-e5-small"
|
MODEL_NAME = "intfloat/multilingual-e5-small"
|
||||||
DUMMY_CHAT_TEMPLATE = """{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\\n'}}{% endfor %}""" # noqa: E501
|
DUMMY_CHAT_TEMPLATE = """{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\\n'}}{% endfor %}""" # noqa: E501
|
||||||
DTYPE = "bfloat16"
|
DTYPE = "bfloat16"
|
||||||
@@ -9,13 +9,12 @@ from typing import Optional
|
|||||||
import openai
|
import openai
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from vllm.entrypoints.openai.protocol import EmbeddingResponse
|
from tests.conftest import HfRunner
|
||||||
|
from tests.models.language.pooling.embed_utils import (
|
||||||
from ...conftest import HfRunner
|
|
||||||
from ...models.language.pooling.embed_utils import (
|
|
||||||
run_embedding_correctness_test)
|
run_embedding_correctness_test)
|
||||||
from ...models.utils import EmbedModelInfo
|
from tests.models.utils import EmbedModelInfo
|
||||||
from ...utils import RemoteOpenAIServer
|
from tests.utils import RemoteOpenAIServer
|
||||||
|
from vllm.entrypoints.openai.protocol import EmbeddingResponse
|
||||||
|
|
||||||
MODELS = [
|
MODELS = [
|
||||||
EmbedModelInfo("intfloat/multilingual-e5-small", is_matryoshka=False),
|
EmbedModelInfo("intfloat/multilingual-e5-small", is_matryoshka=False),
|
||||||
@@ -14,10 +14,9 @@ import openai
|
|||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
|
|
||||||
|
from tests.utils import RemoteOpenAIServer
|
||||||
from vllm.entrypoints.openai.protocol import EmbeddingResponse
|
from vllm.entrypoints.openai.protocol import EmbeddingResponse
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
|
||||||
|
|
||||||
|
|
||||||
def _generate_random_text(word_count: int) -> str:
|
def _generate_random_text(word_count: int) -> str:
|
||||||
"""Generate random text with approximately the specified word count."""
|
"""Generate random text with approximately the specified word count."""
|
||||||
@@ -8,11 +8,10 @@ import pytest
|
|||||||
import requests
|
import requests
|
||||||
|
|
||||||
from tests.models.utils import check_embeddings_close
|
from tests.models.utils import check_embeddings_close
|
||||||
|
from tests.utils import RemoteOpenAIServer
|
||||||
from vllm.entrypoints.openai.protocol import PoolingResponse
|
from vllm.entrypoints.openai.protocol import PoolingResponse
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
|
||||||
|
|
||||||
MODEL_NAME = "internlm/internlm2-1_8b-reward"
|
MODEL_NAME = "internlm/internlm2-1_8b-reward"
|
||||||
DUMMY_CHAT_TEMPLATE = """{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\\n'}}{% endfor %}""" # noqa: E501
|
DUMMY_CHAT_TEMPLATE = """{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\\n'}}{% endfor %}""" # noqa: E501
|
||||||
|
|
||||||
@@ -6,10 +6,9 @@ import requests
|
|||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
from tests.utils import RemoteOpenAIServer
|
||||||
from vllm.entrypoints.openai.protocol import RerankResponse
|
from vllm.entrypoints.openai.protocol import RerankResponse
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
|
||||||
|
|
||||||
MODEL_NAME = "BAAI/bge-reranker-base"
|
MODEL_NAME = "BAAI/bge-reranker-base"
|
||||||
DTYPE = "bfloat16"
|
DTYPE = "bfloat16"
|
||||||
|
|
||||||
@@ -8,10 +8,9 @@ import torch
|
|||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from torch import tensor
|
from torch import tensor
|
||||||
|
|
||||||
|
from tests.utils import RemoteOpenAIServer
|
||||||
from vllm.entrypoints.openai.protocol import ScoreResponse
|
from vllm.entrypoints.openai.protocol import ScoreResponse
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
|
||||||
|
|
||||||
MODELS = [
|
MODELS = [
|
||||||
{
|
{
|
||||||
"name": "BAAI/bge-reranker-v2-m3",
|
"name": "BAAI/bge-reranker-v2-m3",
|
||||||
@@ -7,11 +7,10 @@ import pytest
|
|||||||
import requests
|
import requests
|
||||||
from transformers import AutoProcessor
|
from transformers import AutoProcessor
|
||||||
|
|
||||||
|
from tests.utils import VLLM_PATH, RemoteOpenAIServer
|
||||||
from vllm.entrypoints.openai.protocol import EmbeddingResponse
|
from vllm.entrypoints.openai.protocol import EmbeddingResponse
|
||||||
from vllm.multimodal.utils import encode_image_base64, fetch_image
|
from vllm.multimodal.utils import encode_image_base64, fetch_image
|
||||||
|
|
||||||
from ...utils import VLLM_PATH, RemoteOpenAIServer
|
|
||||||
|
|
||||||
MODEL_NAME = "TIGER-Lab/VLM2Vec-Full"
|
MODEL_NAME = "TIGER-Lab/VLM2Vec-Full"
|
||||||
MAXIMUM_IMAGES = 2
|
MAXIMUM_IMAGES = 2
|
||||||
|
|
||||||
Reference in New Issue
Block a user