[V0 deprecation] Remove V0 HPU backend (#21131)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2025-07-17 16:37:36 -07:00
parent ac9fb732a5
commit 4de7146351
27 changed files with 10 additions and 3926 deletions
--- a/vllm/model_executor/model_loader/bitsandbytes_loader.py
+++ b/vllm/model_executor/model_loader/bitsandbytes_loader.py
@@ -199,10 +199,6 @@ class BitsAndBytesModelLoader(BaseModelLoader):

        if self.pre_quant:
            if self.load_8bit:
-                if current_platform.is_hpu():
-                    raise ValueError(
-                        "currently hpu supports 4bit quantization only")
-
                return self._quantized_8bit_generator(
                    hf_weights_files, use_safetensors,
                    quant_state_dict), quant_state_dict
@@ -306,10 +302,6 @@ class BitsAndBytesModelLoader(BaseModelLoader):
                        in temp_state_dict):
                quant_state = _parse_quant_state(mapped_weight_name,
                                                 temp_state_dict)
-                if current_platform.is_hpu():
-                    assert quant_state.quant_type == "nf4", (
-                        "currently hpu supports nf4 quant_type only")
-
                quant_state_dict[mapped_weight_name] = quant_state
                yield org_weight_name, weight_tensor
            else:
@@ -380,8 +372,7 @@ class BitsAndBytesModelLoader(BaseModelLoader):
                                                      ...]

                # bitsandbytes requires data in GPU
-                if (weight_sub_tensor.is_cuda
-                        or weight_sub_tensor.device.type == "hpu"):
+                if weight_sub_tensor.is_cuda:
                    loaded_weight = weight_sub_tensor
                else:
                    loaded_weight = weight_sub_tensor.to(