[Model] Refactor BLIP/BLIP-2 to support composite model loading (#8407)
This commit is contained in:
@@ -20,7 +20,6 @@ from vllm.config import CacheConfig, MultiModalConfig
|
||||
from vllm.inputs import INPUT_REGISTRY
|
||||
from vllm.inputs.data import LLMInputs
|
||||
from vllm.inputs.registry import InputContext
|
||||
from vllm.logger import init_logger
|
||||
from vllm.model_executor.layers.activation import SiluAndMul, get_act_fn
|
||||
from vllm.model_executor.layers.layernorm import RMSNorm
|
||||
from vllm.model_executor.layers.quantization.base_config import (
|
||||
@@ -43,8 +42,6 @@ from vllm.transformers_utils.configs.ultravox import UltravoxConfig
|
||||
_AUDIO_PLACEHOLDER_TOKEN = 128002
|
||||
_AUDIO_TOKENS_PER_SECOND = 6.25
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
class UltravoxAudioFeatureInputs(TypedDict):
|
||||
type: Literal["audio_features"]
|
||||
|
||||
Reference in New Issue
Block a user