diff --git a/vllm/_xpu_ops.py b/vllm/_xpu_ops.py index e40b18f81..1f64aacd4 100644 --- a/vllm/_xpu_ops.py +++ b/vllm/_xpu_ops.py @@ -105,9 +105,10 @@ class xpu_ops: assert len(window_size) == 2 real_window_size = (window_size[0], window_size[1]) # noqa: F841 - # In encode attention, v maybe not contiguous and current + # In encode attention, k and v maybe not contiguous and current # kernel can't handle it if block_table is None: + k = k.contiguous() v = v.contiguous() return flash_attn_varlen_func( out=out,