[V1] [Hybrid] Validate compatibility of attention backend batch reordering at init time (#21557)
Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
This commit is contained in:
@@ -251,9 +251,6 @@ class AiterFlashAttentionMetadataBuilder(
|
||||
self.aot_sliding_window: Optional[tuple[int, int]] = None
|
||||
self.total_tokens: int = 0
|
||||
|
||||
def reorder_batch(self, input_batch, scheduler_output) -> bool:
|
||||
return False
|
||||
|
||||
def build_for_cudagraph_capture(
|
||||
self, common_attn_metadata: CommonAttentionMetadata):
|
||||
self.total_tokens = self.model_config.max_model_len \
|
||||
|
||||
Reference in New Issue
Block a user