[Quantization] Enable BNB support for InternS1 (#21953)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""Utils for model executor."""
|
||||
|
||||
import copy
|
||||
from typing import Any, Optional
|
||||
|
||||
@@ -9,6 +10,7 @@ import torch
|
||||
|
||||
def set_random_seed(seed: int) -> None:
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
current_platform.seed_everything(seed)
|
||||
|
||||
|
||||
@@ -29,7 +31,7 @@ def set_weight_attrs(
|
||||
return
|
||||
for key, value in weight_attrs.items():
|
||||
assert not hasattr(
|
||||
weight, key), (f"Overwriting existing tensor attribute: {key}")
|
||||
weight, key), f"Overwriting existing tensor attribute: {key}"
|
||||
|
||||
# NOTE(woosuk): During weight loading, we often do something like:
|
||||
# narrowed_tensor = param.data.narrow(0, offset, len)
|
||||
@@ -41,6 +43,7 @@ def set_weight_attrs(
|
||||
# we sync the param tensor after its weight loader is called.
|
||||
# TODO(woosuk): Remove this hack once we have a better solution.
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
if current_platform.is_tpu() and key == "weight_loader":
|
||||
value = _make_synced_weight_loader(value)
|
||||
setattr(weight, key, value)
|
||||
@@ -77,4 +80,17 @@ def get_packed_modules_mapping(model: torch.nn.Module) -> dict[str, list[str]]:
|
||||
f"safely because of conflicts from {type(child).__name__}.")
|
||||
else:
|
||||
parent_map.update(child_map)
|
||||
return parent_map
|
||||
return parent_map
|
||||
|
||||
|
||||
def get_moe_expert_mapping(
|
||||
model: torch.nn.Module, ) -> list[tuple[str, str, int, str]]:
|
||||
if parent_map := getattr(model, "get_expert_mapping", None):
|
||||
return parent_map()
|
||||
else:
|
||||
# We only check main components instead of whole model submodules
|
||||
for child in model.children():
|
||||
child_map = getattr(child, "get_expert_mapping", None)
|
||||
if child_map is not None:
|
||||
return child_map()
|
||||
return []
|
||||
|
||||
Reference in New Issue
Block a user