[UT][intel GPU] use current_platform instead of device hardcode in v1 tests (#20169)
Signed-off-by: Ma, Liangliang <liangliang.ma@intel.com>
This commit is contained in:
@@ -10,6 +10,7 @@ from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, ModelConfig,
|
||||
ParallelConfig, SchedulerConfig, SpeculativeConfig,
|
||||
VllmConfig)
|
||||
from vllm.model_executor.models.llama import LlamaForCausalLM
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.v1.spec_decode.eagle import EagleProposer
|
||||
|
||||
model_dir = "meta-llama/Llama-3.1-8B-Instruct"
|
||||
@@ -38,15 +39,17 @@ def _create_proposer(method: str, k: int) -> EagleProposer:
|
||||
num_speculative_tokens=k,
|
||||
)
|
||||
|
||||
vllm_config = VllmConfig(model_config=model_config,
|
||||
cache_config=CacheConfig(),
|
||||
speculative_config=speculative_config,
|
||||
device_config=DeviceConfig(device="cuda"),
|
||||
parallel_config=ParallelConfig(),
|
||||
load_config=LoadConfig(),
|
||||
scheduler_config=SchedulerConfig())
|
||||
vllm_config = VllmConfig(
|
||||
model_config=model_config,
|
||||
cache_config=CacheConfig(),
|
||||
speculative_config=speculative_config,
|
||||
device_config=DeviceConfig(device=current_platform.device_type),
|
||||
parallel_config=ParallelConfig(),
|
||||
load_config=LoadConfig(),
|
||||
scheduler_config=SchedulerConfig())
|
||||
|
||||
return EagleProposer(vllm_config=vllm_config, device='cuda')
|
||||
return EagleProposer(vllm_config=vllm_config,
|
||||
device=current_platform.device_type)
|
||||
|
||||
|
||||
def test_prepare_inputs():
|
||||
@@ -59,7 +62,7 @@ def test_prepare_inputs():
|
||||
a, a + 1, ..., a + b - n2 - 1,
|
||||
a + b, a + b + 1, ..., a + b + c - n3 - 1]
|
||||
"""
|
||||
device = torch.device('cuda')
|
||||
device = torch.device(current_platform.device_type)
|
||||
|
||||
# a = 4, b = 7, c = 5
|
||||
# n1 = 1, n2 = 3, n3 = 2
|
||||
@@ -198,7 +201,7 @@ def test_load_model(mock_get_model, mock_get_layers, mock_get_pp_group, method,
|
||||
@pytest.mark.parametrize("num_speculative_tokens", [1, 3, 8])
|
||||
def test_propose(num_speculative_tokens):
|
||||
# Use GPU device
|
||||
device = torch.device('cuda')
|
||||
device = torch.device(current_platform.device_type)
|
||||
|
||||
# Setup test parameters
|
||||
batch_size = 2
|
||||
|
||||
Reference in New Issue
Block a user