[ROCm] Enable bitsandbytes quantization support on ROCm (#34688)

Signed-off-by: badaoui <abdennacerbadaoui0@gmail.com>
2026-02-21 09:34:55 +01:00
parent 2aab2bb543
commit 8dc8a99b56
8 changed files with 29 additions and 44 deletions
--- a/tests/models/test_transformers.py
+++ b/tests/models/test_transformers.py
@@ -6,8 +6,6 @@ from typing import Any

 import pytest

-from vllm.platforms import current_platform
-
 from ..conftest import HfRunner, VllmRunner
 from ..utils import multi_gpu_test, prep_prompts
 from .registry import HF_EXAMPLE_MODELS
@@ -131,6 +129,7 @@ def test_distributed(
                "quantization": "bitsandbytes",
            },
        ),
+        ("unsloth/tinyllama-bnb-4bit", {}),
    ],
 )
@pytest.mark.parametrize("max_tokens", [32])
@@ -143,12 +142,6 @@ def test_quantization(
    max_tokens: int,
    num_logprobs: int,
 ) -> None:
-    if (
-        current_platform.is_rocm()
-        and quantization_kwargs.get("quantization", "") == "bitsandbytes"
-    ):
-        pytest.skip("bitsandbytes quantization is currently not supported in rocm.")
-
    with vllm_runner(
        model,
        model_impl="auto",