[V1] Remove num_input_tokens from attn_metadata (#17193)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
Chen Zhang
2025-04-30 00:28:41 +08:00
committed by GitHub
parent 2ef5d106bb
commit 24e6ad3f16
6 changed files with 14 additions and 21 deletions

View File

@@ -183,9 +183,6 @@ class FlashInferMetadata:
decode_wrapper: Optional[BatchDecodeWithPagedKVCacheWrapper] = None
cascade_wrapper: Optional[MultiLevelCascadeAttentionWrapper] = None
# For logging.
num_input_tokens: int = 0 # Number of tokens including padding.
@property
def query_start_loc(self):
# The GPUModelRunner expects to be able to access this property.