[V1] Remove num_input_tokens from attn_metadata (#17193)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
2025-04-30 00:28:41 +08:00
parent 2ef5d106bb
commit 24e6ad3f16
6 changed files with 14 additions and 21 deletions
--- a/vllm/v1/attention/backends/flashinfer.py
+++ b/vllm/v1/attention/backends/flashinfer.py
@@ -183,9 +183,6 @@ class FlashInferMetadata:
    decode_wrapper: Optional[BatchDecodeWithPagedKVCacheWrapper] = None
    cascade_wrapper: Optional[MultiLevelCascadeAttentionWrapper] = None

-    # For logging.
-    num_input_tokens: int = 0  # Number of tokens including padding.
-
    @property
    def query_start_loc(self):
        # The GPUModelRunner expects to be able to access this property.