[Attention] Clean up iRoPE in V1 (#21188)

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
Lucas Wilkinson
2025-07-21 12:10:30 -04:00
committed by GitHub
parent 6ece16c4fe
commit 304dce7ec0
9 changed files with 14 additions and 26 deletions

View File

@@ -337,7 +337,6 @@ class AiterFlashAttentionImpl(AttentionImpl):
logits_soft_cap: Optional[float] = None,
attn_type: AttentionType = AttentionType.DECODER,
kv_sharing_target_layer_name: Optional[int] = None,
use_irope: bool = False,
) -> None:
self.num_heads = num_heads
self.head_size = head_size
@@ -367,7 +366,6 @@ class AiterFlashAttentionImpl(AttentionImpl):
"encoder/decoder cross-attention "
"are not implemented for "
"FlashAttentionImpl")
self.use_irope = use_irope
if is_quantized_kv_cache(self.kv_cache_dtype):
raise NotImplementedError(
"AiterFlashAttention does not support fp8 kv-cache on this "