[CI] Move applicable tests to CPU (#24080)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Reza Barazesh
2025-09-30 09:45:20 -04:00
committed by GitHub
parent 80608ba5af
commit bc546f76a1
39 changed files with 136 additions and 28 deletions

View File

@@ -1,10 +1,13 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
import torch
from vllm.model_executor.models.utils import AutoWeightsLoader
pytestmark = pytest.mark.cpu_test
class ModuleWithBatchNorm(torch.nn.Module):

View File

@@ -16,6 +16,8 @@ from vllm.model_executor.models.vision import (
from vllm.platforms import current_platform
from vllm.utils import get_open_port, update_environment_variables
pytestmark = pytest.mark.cpu_test
@pytest.mark.parametrize(
("select_layers", "num_layers_loaded", "max_possible_layers",

View File

@@ -19,6 +19,8 @@ from vllm.multimodal.inputs import (MultiModalFieldElem, MultiModalKwargsItem,
MultiModalSharedField)
from vllm.multimodal.processing import PromptInsertion
pytestmark = pytest.mark.cpu_test
def _dummy_elem(
modality: str,

View File

@@ -10,6 +10,8 @@ from PIL import Image, ImageDraw
from vllm.multimodal.hasher import MultiModalHasher
pytestmark = pytest.mark.cpu_test
ASSETS_DIR = Path(__file__).parent / "assets"
assert ASSETS_DIR.exists()

View File

@@ -8,6 +8,8 @@ from PIL import Image, ImageChops
from vllm.multimodal.image import ImageMediaIO, convert_image_mode
pytestmark = pytest.mark.cpu_test
ASSETS_DIR = Path(__file__).parent / "assets"
assert ASSETS_DIR.exists()

View File

@@ -1,10 +1,13 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
import torch
from vllm.multimodal.inputs import MultiModalKwargs, NestedTensors
pytestmark = pytest.mark.cpu_test
def assert_nested_tensors_equal(expected: NestedTensors,
actual: NestedTensors):

View File

@@ -25,6 +25,8 @@ from vllm.transformers_utils.tokenizer import AnyTokenizer
from .utils import random_image
pytestmark = pytest.mark.cpu_test
# yapf: disable
@pytest.mark.parametrize(

View File

@@ -11,6 +11,8 @@ from vllm.multimodal import MULTIMODAL_REGISTRY
from ..models.utils import build_model_context
pytestmark = pytest.mark.cpu_test
@pytest.mark.parametrize(
"model_id,limit_mm_per_prompt,expected",

View File

@@ -17,6 +17,8 @@ from vllm.multimodal.video import (VIDEO_LOADER_REGISTRY, VideoLoader,
from .utils import cosine_similarity, create_video_from_image, normalize_image
pytestmark = pytest.mark.cpu_test
NUM_FRAMES = 10
FAKE_OUTPUT_1 = np.random.rand(NUM_FRAMES, 1280, 720, 3)
FAKE_OUTPUT_2 = np.random.rand(NUM_FRAMES, 1280, 720, 3)

View File

@@ -6,6 +6,8 @@ import pytest
from vllm.inputs import zip_enc_dec_prompts
from vllm.inputs.parse import parse_and_batch_prompt
pytestmark = pytest.mark.cpu_test
STRING_INPUTS = [
'',
'foo',

View File

@@ -1,8 +1,12 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from vllm.outputs import RequestOutput
pytestmark = pytest.mark.cpu_test
def test_request_output_forward_compatible():
output = RequestOutput(request_id="test_request_id",

View File

@@ -12,7 +12,7 @@ from .utils import ARGS, CONFIGS, ServerConfig
# for each server config, download the model and return the config
@pytest.fixture(scope="session", params=CONFIGS.keys())
@pytest.fixture(scope="package", params=CONFIGS.keys())
def server_config(request):
config = CONFIGS[request.param]
@@ -26,7 +26,7 @@ def server_config(request):
# run this for each server config
@pytest.fixture(scope="session")
@pytest.fixture(scope="package")
def server(request, server_config: ServerConfig):
model = server_config["model"]
args_for_model = server_config["arguments"]

View File

@@ -10,6 +10,8 @@ from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers import Glm4MoeModelToolParser
from vllm.transformers_utils.tokenizer import get_tokenizer
pytestmark = pytest.mark.cpu_test
pytest.skip("skip glm4_moe parser test", allow_module_level=True)
# Use a common model that is likely to be available
MODEL = "zai-org/GLM-4.5"

View File

@@ -15,6 +15,8 @@ from vllm.entrypoints.openai.tool_parsers import JambaToolParser
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
pytestmark = pytest.mark.cpu_test
MODEL = "ai21labs/Jamba-tiny-dev"

View File

@@ -10,6 +10,8 @@ from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers import KimiK2ToolParser
from vllm.transformers_utils.tokenizer import get_tokenizer
pytestmark = pytest.mark.cpu_test
# Use a common model that is likely to be available
MODEL = "moonshotai/Kimi-K2-Instruct"

View File

@@ -12,6 +12,8 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionToolsParam,
from vllm.entrypoints.openai.tool_parsers import MinimaxToolParser
from vllm.transformers_utils.tokenizer import get_tokenizer
pytestmark = pytest.mark.cpu_test
# Use a common model that is likely to be available
MODEL = "MiniMaxAi/MiniMax-M1-40k"

View File

@@ -18,6 +18,8 @@ from vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser import (
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
pytestmark = pytest.mark.cpu_test
MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8"

View File

@@ -16,6 +16,8 @@ from vllm.entrypoints.openai.tool_parsers import SeedOssToolParser
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
pytestmark = pytest.mark.cpu_test
# Use a common model that is likely to be available
MODEL = "ByteDance-Seed/Seed-OSS-36B-Instruct"

View File

@@ -12,6 +12,8 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
ChatCompletionToolsParam)
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
pytestmark = pytest.mark.cpu_test
EXAMPLE_TOOLS = [
{
"type": "function",

View File

@@ -14,6 +14,8 @@ from vllm.entrypoints.openai.tool_parsers import xLAMToolParser
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
pytestmark = pytest.mark.cpu_test
# Use a common model that is likely to be available
MODEL = "Salesforce/Llama-xLAM-2-8B-fc-r"

View File

@@ -11,6 +11,8 @@ from vllm.v1.utils import ConstantList
from .utils import create_requests, create_scheduler
pytestmark = pytest.mark.cpu_test
def _make_model_runner_output(
scheduler_output: SchedulerOutput, ) -> ModelRunnerOutput:

View File

@@ -1,9 +1,12 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from vllm.multimodal.inputs import MultiModalFeatureSpec, PlaceholderRange
from vllm.v1.core.encoder_cache_manager import EncoderCacheManager
pytestmark = pytest.mark.cpu_test
# ------------------ Mock Classes ------------------ #
class MockRequest:

View File

@@ -32,6 +32,8 @@ from vllm.v1.request import Request
# yapf: enable
pytestmark = pytest.mark.cpu_test
@pytest.fixture(autouse=True)
def _auto_init_hash_fn(request):

View File

@@ -25,6 +25,8 @@ from vllm.v1.core.kv_cache_utils import (BlockHash, BlockHashWithGroupId,
from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,
KVCacheGroupSpec, SlidingWindowSpec)
pytestmark = pytest.mark.cpu_test
@pytest.fixture(autouse=True)
def _auto_init_hash_fn(request):
@@ -1267,7 +1269,7 @@ def test_kv_cache_events(blocks_to_cache: int):
def test_eagle_enabled_removes_last_block():
"""Verify Eagle does NOT remove blocks when request
"""Verify Eagle does NOT remove blocks when request
length is divisible by block size."""
block_size = 16
manager = KVCacheManager(

View File

@@ -23,6 +23,8 @@ from vllm.v1.structured_output.request import StructuredOutputRequest
from .utils import EOS_TOKEN_ID, create_requests, create_scheduler
pytestmark = pytest.mark.cpu_test
def test_add_requests():
scheduler = create_scheduler()

View File

@@ -3,6 +3,7 @@
import random
import pytest
import torch
from vllm.v1.core.block_pool import BlockPool
@@ -13,6 +14,8 @@ from vllm.v1.core.single_type_kv_cache_manager import (
from vllm.v1.kv_cache_interface import (ChunkedLocalAttentionSpec,
SlidingWindowSpec)
pytestmark = pytest.mark.cpu_test
def get_sliding_window_manager(sliding_window_spec, block_pool):
return SlidingWindowManager(sliding_window_spec,

View File

@@ -3,9 +3,13 @@
from concurrent.futures import Future
from typing import Optional
import pytest
from vllm.distributed.kv_transfer.kv_connector.utils import KVOutputAggregator
from vllm.v1.outputs import KVConnectorOutput, ModelRunnerOutput
pytestmark = pytest.mark.cpu_test
class DummyModelRunnerOutput(ModelRunnerOutput):

View File

@@ -2,12 +2,16 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import copy
import pytest
from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput
from vllm.v1.request import FinishReason, RequestStatus
from .utils import (assert_scheduler_empty, create_model_runner_output,
create_request, create_scheduler, create_vllm_config)
pytestmark = pytest.mark.cpu_test
def test_basic_lifecycle():
"""Test lifecycle of a Remote Decode request."""

View File

@@ -2,12 +2,16 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import copy
import pytest
from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput
from vllm.v1.request import FinishReason, RequestStatus
from .utils import (assert_scheduler_empty, create_model_runner_output,
create_request, create_scheduler, create_vllm_config)
pytestmark = pytest.mark.cpu_test
def test_basic_lifecycle():
"""Test lifecycle of a remote prefill."""

View File

@@ -7,6 +7,8 @@ import pytest
from vllm.v1.metrics.reader import (Counter, Gauge, Histogram, Vector,
get_metrics_snapshot)
pytestmark = pytest.mark.cpu_test
@pytest.fixture(autouse=True)
def test_registry(monkeypatch):

View File

@@ -6,6 +6,8 @@ import pytest
from vllm.v1.structured_output.backend_xgrammar import (
has_xgrammar_unsupported_json_features)
pytestmark = pytest.mark.cpu_test
@pytest.fixture
def unsupported_string_schemas():

View File

@@ -16,6 +16,8 @@ from vllm.multimodal.inputs import (MultiModalBatchedField,
MultiModalSharedField, NestedTensors)
from vllm.v1.serial_utils import MsgpackDecoder, MsgpackEncoder
pytestmark = pytest.mark.cpu_test
class UnrecognizedType(UserDict):