[ROCm][CI] Enable hybrid chunked prefill test (#38317)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
Andreas Karatzas
2026-03-29 21:30:26 -05:00
committed by GitHub
parent d28d86e8a3
commit 4f2ed5fddb
2 changed files with 21 additions and 2 deletions

View File

@@ -36,14 +36,20 @@ MESSAGES = [
]
@pytest.mark.skipif(not current_platform.is_cuda(), reason="CUDA not available")
@pytest.mark.parametrize(
"model_name",
[
pytest.param("Qwen/Qwen3.5-4B", marks=[large_gpu_mark(min_gb=40)]),
pytest.param(
"nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8",
marks=[large_gpu_mark(min_gb=80)] + multi_gpu_marks(num_gpus=4),
marks=[large_gpu_mark(min_gb=80)]
+ multi_gpu_marks(num_gpus=4)
+ [
pytest.mark.skipif(
not current_platform.is_cuda(),
reason="modelopt quantization is supported only on CUDA",
)
],
),
],
)