[Misc] Minor fix in KVCache type (#3652)

2024-03-26 23:14:06 -07:00
parent 76879342a3
commit e66b629c04
3 changed files with 4 additions and 8 deletions
--- a/vllm/model_executor/models/llava.py
+++ b/vllm/model_executor/models/llava.py
@@ -1,4 +1,4 @@
-from typing import List, Optional, Tuple
+from typing import List, Optional

 import torch
 from torch import nn
@@ -19,8 +19,6 @@ from vllm.model_executor.weight_utils import (default_weight_loader,
                                              hf_model_weights_iterator)
 from vllm.sequence import SamplerOutput

-KVCache = Tuple[torch.Tensor, torch.Tensor]
-
 _KEYS_TO_MODIFY_MAPPING = {
    "language_model.lm_head": "lm_head",
    "language_model.model": "language_model",
@@ -102,7 +100,7 @@ class LlavaForConditionalGeneration(nn.Module):
        self,
        input_ids: torch.Tensor,
        positions: torch.Tensor,
-        kv_caches: List[KVCache],
+        kv_caches: List[torch.Tensor],
        attn_metadata: AttentionMetadata,
        image_input: Optional[torch.Tensor] = None
    ) -> SamplerOutput:  # noqa: E501