diff --git a/vllm/model_executor/layers/quantization/utils/int8_utils.py b/vllm/model_executor/layers/quantization/utils/int8_utils.py index 020098dff..a98e29ffd 100644 --- a/vllm/model_executor/layers/quantization/utils/int8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/int8_utils.py @@ -88,6 +88,13 @@ if current_platform.is_rocm(): def round_int8(x): return tl.extra.hip.libdevice.round(x).to(tl.int8) + +elif current_platform.is_xpu(): + + @triton.jit + def round_int8(x): + return tl.extra.intel.libdevice.round(x).to(tl.int8) + else: @triton.jit