[Model] Refactor BLIP/BLIP-2 to support composite model loading (#8407)

This commit is contained in:
Cyrus Leung
2024-09-22 20:24:21 +08:00
committed by GitHub
parent 0e40ac9b7b
commit 06ed2815e2
10 changed files with 112 additions and 113 deletions

View File

@@ -28,7 +28,6 @@ from transformers import FuyuConfig, FuyuImageProcessor
from vllm.attention import AttentionMetadata
from vllm.config import CacheConfig, MultiModalConfig
from vllm.inputs import INPUT_REGISTRY, InputContext, LLMInputs
from vllm.logger import init_logger
from vllm.model_executor.layers.linear import ColumnParallelLinear
from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.model_executor.layers.sampler import SamplerOutput
@@ -45,8 +44,6 @@ from vllm.sequence import (VLLM_TOKEN_ID_ARRAY_TYPE, IntermediateTensors,
from .interfaces import SupportsMultiModal
from .utils import merge_multimodal_embeddings
logger = init_logger(__name__)
# Cannot find the following 2 numbers from hf config.
_IMAGE_TOKEN_ID = 71011
_NEWLINE_TOKEN_ID = 71019