[Bugfix] fix composite weight loading and EAGLE weight loading (#9160)
This commit is contained in:
@@ -25,11 +25,6 @@ from vllm.model_executor.layers.layernorm import RMSNorm
|
||||
from vllm.model_executor.layers.quantization import QuantizationConfig
|
||||
from vllm.model_executor.layers.sampler import Sampler, SamplerOutput
|
||||
from vllm.model_executor.model_loader.loader import DefaultModelLoader
|
||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
||||
from vllm.model_executor.models.utils import (flatten_bn,
|
||||
group_weights_with_prefix,
|
||||
init_vllm_registered_model,
|
||||
merge_multimodal_embeddings)
|
||||
from vllm.model_executor.sampling_metadata import SamplingMetadata
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
from vllm.multimodal.base import MultiModalInputs, NestedTensors
|
||||
@@ -41,6 +36,8 @@ from vllm.transformers_utils.configs.ultravox import UltravoxConfig
|
||||
from vllm.utils import is_list_of
|
||||
|
||||
from .interfaces import SupportsMultiModal, SupportsPP
|
||||
from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn,
|
||||
init_vllm_registered_model, merge_multimodal_embeddings)
|
||||
|
||||
_AUDIO_PLACEHOLDER_TOKEN = 128002
|
||||
_AUDIO_TOKENS_PER_SECOND = 6.25
|
||||
@@ -498,30 +495,9 @@ class UltravoxModel(nn.Module, SupportsMultiModal, SupportsPP):
|
||||
return self.language_model.sample(logits, sampling_metadata)
|
||||
|
||||
def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
|
||||
# prepare weight iterators for components
|
||||
weights_group = group_weights_with_prefix(weights)
|
||||
hf_to_vllm_mapper = WeightsMapper(
|
||||
orig_to_new_prefix={"audio_tower.model.encoder.": "audio_tower."})
|
||||
|
||||
# load audio tower weights
|
||||
audio_tower_weights = weights_group["audio_tower"]
|
||||
audio_tower_params_dict = dict(
|
||||
self.audio_tower.named_parameters(
|
||||
prefix=self.audio_tower.base_model_prefix))
|
||||
for name, loaded_weight in audio_tower_weights:
|
||||
if name in audio_tower_params_dict:
|
||||
param = audio_tower_params_dict[name]
|
||||
weight_loader = getattr(param, "weight_loader",
|
||||
default_weight_loader)
|
||||
weight_loader(param, loaded_weight)
|
||||
|
||||
# load projector weights
|
||||
projector_weights = weights_group["multi_modal_projector"]
|
||||
projector_params_dict = dict(
|
||||
self.multi_modal_projector.named_parameters())
|
||||
for name, loaded_weight in projector_weights:
|
||||
param = projector_params_dict[name]
|
||||
weight_loader = getattr(param, "weight_loader",
|
||||
default_weight_loader)
|
||||
weight_loader(param, loaded_weight)
|
||||
|
||||
# load llm backbone
|
||||
self.language_model.load_weights(weights_group["language_model"])
|
||||
loader = AutoWeightsLoader(self,
|
||||
ignore_unexpected_prefixes=["audio_tower."])
|
||||
loader.load_weights(weights, mapper=hf_to_vllm_mapper)
|
||||
|
||||
Reference in New Issue
Block a user