[Hardware][Intel GPU] Upgrade to torch 2.7 (#17444)

Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> Co-authored-by: Qiming Zhang <qiming1.zhang@intel.com>
2025-04-30 15:03:58 +08:00
parent 6ed9f6047e
commit ed6cfb90c8
5 changed files with 18 additions and 35 deletions
--- a/vllm/_ipex_ops.py
+++ b/vllm/_ipex_ops.py
@@ -177,6 +177,7 @@ class ipex_ops:
        out: torch.Tensor,
        seqlen_q: torch.Tensor,
        seqlen_k: torch.Tensor,
+        alibi_slopes: torch.Tensor,
        max_seqlen_q: int,
        max_seqlen_k: int,
        pdropout: float,
@@ -185,6 +186,8 @@ class ipex_ops:
        is_causal: bool,
        return_softmax: bool,
        gen_: torch.Generator,
+        window_size_left: float,
+        window_size_right: float,
        logits_soft_cap: float,
    ) -> None:
        if ipex.__version__.endswith("cpu"):
@@ -200,15 +203,12 @@ class ipex_ops:
                                                 is_causal, return_softmax,
                                                 gen_)
        else:  # XPU build
-            ipex.llm.functional.varlen_attention(query.contiguous(),
-                                                 key.contiguous(),
-                                                 value.contiguous(), out,
-                                                 seqlen_q.int(),
-                                                 seqlen_k.int(), max_seqlen_q,
-                                                 max_seqlen_k, pdropout,
-                                                 softmax_scale, zero_tensors,
-                                                 is_causal, return_softmax,
-                                                 gen_, logits_soft_cap)
+            ipex.llm.functional.varlen_attention(
+                query.contiguous(), key.contiguous(), value.contiguous(), out,
+                seqlen_q.int(), seqlen_k.int(), alibi_slopes, max_seqlen_q,
+                max_seqlen_k, pdropout, softmax_scale, zero_tensors, is_causal,
+                return_softmax, gen_, window_size_left, window_size_right,
+                logits_soft_cap)

    @staticmethod
    def reshape_and_cache(