Enable bitsandbytes quantization on AMD GPUs that use warp size 32 (#27307)

Signed-off-by: sstamenk <strahinja.stamenkovic@amd.com>
2025-11-19 04:12:31 +01:00
parent 20852c8f4c
commit 814843e021
2 changed files with 10 additions and 4 deletions
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -185,6 +185,9 @@ class RocmPlatform(Platform):
        "petit_nvfp4",
        "torchao",
    ]
+    # bitsandbytes not supported on gfx9 (warp size 64 limitation)
+    if not on_gfx9():
+        supported_quantization += ["bitsandbytes"]

    @classmethod
    def get_vit_attn_backend(