[ROCm][CI] Add large_gpu_mark to test_max_tokens_none for ROCm (#37717)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
@@ -12,6 +12,7 @@ import gc
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from tests.utils import large_gpu_mark
|
||||
from vllm import LLM, SamplingParams
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
@@ -32,10 +33,21 @@ def test_duplicated_ignored_sequence_group():
|
||||
assert len(prompts) == len(outputs)
|
||||
|
||||
|
||||
def test_max_tokens_none():
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
pytest.param(
|
||||
"distilbert/distilgpt2",
|
||||
marks=[
|
||||
*([large_gpu_mark(min_gb=80)] if current_platform.is_rocm() else []),
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_max_tokens_none(model):
|
||||
sampling_params = SamplingParams(temperature=0.01, top_p=0.1, max_tokens=None)
|
||||
llm = LLM(
|
||||
model="distilbert/distilgpt2",
|
||||
model=model,
|
||||
max_num_batched_tokens=4096,
|
||||
tensor_parallel_size=1,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user