[Quant] Add SupportsQuant to phi3 and clip (#13104)
This commit is contained in:
@@ -50,7 +50,7 @@ from vllm.sequence import IntermediateTensors
|
||||
from vllm.utils import is_list_of
|
||||
|
||||
from .clip import CLIPVisionModel
|
||||
from .interfaces import SupportsMultiModal, SupportsPP
|
||||
from .interfaces import SupportsMultiModal, SupportsPP, SupportsQuant
|
||||
from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn,
|
||||
init_vllm_registered_model, maybe_prefix,
|
||||
merge_multimodal_embeddings)
|
||||
@@ -498,7 +498,8 @@ class Phi3VMultiModalProcessor(BaseMultiModalProcessor[Phi3VProcessingInfo]):
|
||||
@MULTIMODAL_REGISTRY.register_processor(Phi3VMultiModalProcessor,
|
||||
info=Phi3VProcessingInfo,
|
||||
dummy_inputs=Phi3VDummyInputsBuilder)
|
||||
class Phi3VForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
|
||||
class Phi3VForCausalLM(nn.Module, SupportsMultiModal, SupportsPP,
|
||||
SupportsQuant):
|
||||
hf_to_vllm_mapper = WeightsMapper(
|
||||
orig_to_new_prefix={
|
||||
"model.vision_embed_tokens.wte": "embed_tokens",
|
||||
@@ -510,7 +511,6 @@ class Phi3VForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
|
||||
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
||||
super().__init__()
|
||||
config = vllm_config.model_config.hf_config
|
||||
quant_config = vllm_config.quant_config
|
||||
multimodal_config = vllm_config.model_config.multimodal_config
|
||||
self.config = config
|
||||
self.multimodal_config = multimodal_config
|
||||
@@ -520,14 +520,14 @@ class Phi3VForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
|
||||
config.vocab_size,
|
||||
config.hidden_size,
|
||||
org_num_embeddings=config.vocab_size,
|
||||
quant_config=quant_config,
|
||||
quant_config=self.quant_config,
|
||||
prefix=maybe_prefix(prefix, "model.embed_tokens"),
|
||||
)
|
||||
|
||||
# TODO: Optionally initializes this for supporting input embeddings.
|
||||
self.vision_embed_tokens = Phi3HDImageEmbedding(
|
||||
config,
|
||||
quant_config,
|
||||
self.quant_config,
|
||||
prefix=maybe_prefix(prefix, "model.vision_embed_tokens"))
|
||||
|
||||
self.language_model = init_vllm_registered_model(
|
||||
|
||||
Reference in New Issue
Block a user