refactor hard coded device string in test files under tests/v1 and tests/lora (#37566)
Signed-off-by: Liao, Wei <wei.liao@intel.com>
This commit is contained in:
@@ -22,10 +22,8 @@ from vllm.v1.worker.gpu_input_batch import CachedRequestState, InputBatch
|
||||
VOCAB_SIZE = 1024
|
||||
NUM_OUTPUT_TOKENS = 20
|
||||
MAX_PROMPT_SIZE = 100
|
||||
CUDA_DEVICES = [
|
||||
f"{current_platform.device_type}:{i}"
|
||||
for i in range(min(current_platform.device_count(), 2))
|
||||
]
|
||||
DEVICE_TYPE = current_platform.device_type
|
||||
DEVICES = [f"{DEVICE_TYPE}:{i}" for i in range(min(current_platform.device_count(), 2))]
|
||||
MAX_NUM_PROMPT_TOKENS = 64
|
||||
|
||||
|
||||
@@ -219,7 +217,7 @@ def _construct_cached_request_state(req_id_suffix: int):
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("device", CUDA_DEVICES)
|
||||
@pytest.mark.parametrize("device", DEVICES)
|
||||
@pytest.mark.parametrize("batch_size", [1, 2, 32, 64])
|
||||
def test_sampling_metadata_in_input_batch(device: str, batch_size: int):
|
||||
"""
|
||||
@@ -313,7 +311,7 @@ def test_sampling_metadata_in_input_batch(device: str, batch_size: int):
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("device", CUDA_DEVICES)
|
||||
@pytest.mark.parametrize("device", DEVICES)
|
||||
@pytest.mark.parametrize("batch_size", [32])
|
||||
@pytest.mark.parametrize("swap_list", [((0, 1),)])
|
||||
def test_swap_states_in_input_batch(device: str, batch_size: int, swap_list: list):
|
||||
@@ -400,7 +398,7 @@ def _construct_pooling_request(req_id_suffix: int, pooling_params=None):
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("device", CUDA_DEVICES)
|
||||
@pytest.mark.parametrize("device", DEVICES)
|
||||
def test_pooling_prompt_lens_not_aliased(device: str):
|
||||
"""Verify that prompt_lens in PoolingMetadata does not share memory
|
||||
with the internal num_prompt_tokens pinned buffer. Guards against possible
|
||||
|
||||
Reference in New Issue
Block a user