[Core] Optimize SPMD architecture with delta + serialization optimization (#7109)
This commit is contained in:
@@ -23,6 +23,7 @@
|
||||
"""Inference-only MiniCPM-V model compatible with HuggingFace weights."""
|
||||
import math
|
||||
import re
|
||||
from array import array
|
||||
from functools import partial
|
||||
from typing import (Any, Callable, Iterable, List, Mapping, Optional, Tuple,
|
||||
TypedDict, Union)
|
||||
@@ -55,7 +56,8 @@ from vllm.model_executor.sampling_metadata import SamplingMetadata
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
from vllm.multimodal.image import (cached_get_image_processor,
|
||||
cached_get_tokenizer)
|
||||
from vllm.sequence import IntermediateTensors, SamplerOutput, SequenceData
|
||||
from vllm.sequence import (VLLM_TOKEN_ID_ARRAY_TYPE, IntermediateTensors,
|
||||
SamplerOutput, SequenceData)
|
||||
|
||||
from .idefics2_vision_model import Idefics2VisionTransformer
|
||||
|
||||
@@ -408,7 +410,7 @@ def get_max_minicpmv_image_tokens(ctx: InputContext):
|
||||
|
||||
|
||||
def dummy_seq_data_for_minicpmv(seq_len: int, num_images: int):
|
||||
token_ids = [0] * seq_len
|
||||
token_ids = array(VLLM_TOKEN_ID_ARRAY_TYPE, [0]) * seq_len
|
||||
return SequenceData(token_ids)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user