[V1] [Hybrid] Validate compatibility of attention backend batch reordering at init time (#21557)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
This commit is contained in:
Thomas Parnell
2025-08-02 14:29:40 +02:00
committed by GitHub
parent f5d0f4784f
commit 4abfd8796f
7 changed files with 96 additions and 72 deletions

View File

@@ -251,9 +251,6 @@ class AiterFlashAttentionMetadataBuilder(
self.aot_sliding_window: Optional[tuple[int, int]] = None
self.total_tokens: int = 0
def reorder_batch(self, input_batch, scheduler_output) -> bool:
return False
def build_for_cudagraph_capture(
self, common_attn_metadata: CommonAttentionMetadata):
self.total_tokens = self.model_config.max_model_len \