[VLM] Use SequenceData.from_token_counts to create dummy data (#8687)

2024-09-21 14:28:56 +08:00
parent 71c60491f2
commit 5e85f4f82a
12 changed files with 74 additions and 81 deletions
--- a/vllm/model_executor/models/clip.py
+++ b/vllm/model_executor/models/clip.py
@@ -1,6 +1,5 @@
 """Minimal implementation of CLIPVisionModel intended to be only used
 within a vision language model."""
-from array import array
 from typing import Iterable, List, Optional, Tuple, Union

 import torch
@@ -20,7 +19,7 @@ from vllm.model_executor.layers.quantization import QuantizationConfig
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 from vllm.multimodal.utils import (cached_get_tokenizer,
                                   repeat_and_pad_placeholder_tokens)
-from vllm.sequence import VLLM_TOKEN_ID_ARRAY_TYPE, SequenceData
+from vllm.sequence import SequenceData

 try:
    from xformers import ops as xops
@@ -62,11 +61,10 @@ def dummy_seq_data_for_clip(
    else:
        image_feature_size = image_feature_size_override

-    token_ids = array(VLLM_TOKEN_ID_ARRAY_TYPE,
-                      [image_token_id]) * image_feature_size * num_images
-    token_ids += array(VLLM_TOKEN_ID_ARRAY_TYPE,
-                       [0]) * (seq_len - image_feature_size * num_images)
-    return SequenceData(token_ids)
+    return SequenceData.from_token_counts(
+        (image_token_id, image_feature_size * num_images),
+        (0, seq_len - image_feature_size * num_images),
+    )


 def dummy_image_for_clip(