[CI] Move applicable tests to CPU (#24080)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-09-30 09:45:20 -04:00
parent 80608ba5af
commit bc546f76a1
39 changed files with 136 additions and 28 deletions
--- a/tests/v1/core/test_async_scheduler.py
+++ b/tests/v1/core/test_async_scheduler.py
@@ -11,6 +11,8 @@ from vllm.v1.utils import ConstantList

 from .utils import create_requests, create_scheduler

+pytestmark = pytest.mark.cpu_test
+

 def _make_model_runner_output(
    scheduler_output: SchedulerOutput, ) -> ModelRunnerOutput:
--- a/tests/v1/core/test_encoder_cache_manager.py
+++ b/tests/v1/core/test_encoder_cache_manager.py
@@ -1,9 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest

 from vllm.multimodal.inputs import MultiModalFeatureSpec, PlaceholderRange
 from vllm.v1.core.encoder_cache_manager import EncoderCacheManager

+pytestmark = pytest.mark.cpu_test
+

 # ------------------ Mock Classes ------------------ #
 class MockRequest:
--- a/tests/v1/core/test_kv_cache_utils.py
+++ b/tests/v1/core/test_kv_cache_utils.py
@@ -32,6 +32,8 @@ from vllm.v1.request import Request

 # yapf: enable

+pytestmark = pytest.mark.cpu_test
+

@pytest.fixture(autouse=True)
 def _auto_init_hash_fn(request):
--- a/tests/v1/core/test_prefix_caching.py
+++ b/tests/v1/core/test_prefix_caching.py
@@ -25,6 +25,8 @@ from vllm.v1.core.kv_cache_utils import (BlockHash, BlockHashWithGroupId,
 from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,
                                        KVCacheGroupSpec, SlidingWindowSpec)

+pytestmark = pytest.mark.cpu_test
+

@pytest.fixture(autouse=True)
 def _auto_init_hash_fn(request):
@@ -1267,7 +1269,7 @@ def test_kv_cache_events(blocks_to_cache: int):


 def test_eagle_enabled_removes_last_block():
-    """Verify Eagle does NOT remove blocks when request 
+    """Verify Eagle does NOT remove blocks when request
    length is divisible by block size."""
    block_size = 16
    manager = KVCacheManager(
--- a/tests/v1/core/test_scheduler.py
+++ b/tests/v1/core/test_scheduler.py
@@ -23,6 +23,8 @@ from vllm.v1.structured_output.request import StructuredOutputRequest

 from .utils import EOS_TOKEN_ID, create_requests, create_scheduler

+pytestmark = pytest.mark.cpu_test
+

 def test_add_requests():
    scheduler = create_scheduler()
--- a/tests/v1/core/test_single_type_kv_cache_manager.py
+++ b/tests/v1/core/test_single_type_kv_cache_manager.py
@@ -3,6 +3,7 @@

 import random

+import pytest
 import torch

 from vllm.v1.core.block_pool import BlockPool
@@ -13,6 +14,8 @@ from vllm.v1.core.single_type_kv_cache_manager import (
 from vllm.v1.kv_cache_interface import (ChunkedLocalAttentionSpec,
                                        SlidingWindowSpec)

+pytestmark = pytest.mark.cpu_test
+

 def get_sliding_window_manager(sliding_window_spec, block_pool):
    return SlidingWindowManager(sliding_window_spec,