[Quantization] Enable BNB support for InternS1 (#21953)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Jee Jee Li
2025-08-01 19:09:54 +08:00
committed by GitHub
parent 4931486988
commit 28b18cc741
2 changed files with 43 additions and 16 deletions

View File

@@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Utils for model executor."""
import copy
from typing import Any, Optional
@@ -9,6 +10,7 @@ import torch
def set_random_seed(seed: int) -> None:
from vllm.platforms import current_platform
current_platform.seed_everything(seed)
@@ -29,7 +31,7 @@ def set_weight_attrs(
return
for key, value in weight_attrs.items():
assert not hasattr(
weight, key), (f"Overwriting existing tensor attribute: {key}")
weight, key), f"Overwriting existing tensor attribute: {key}"
# NOTE(woosuk): During weight loading, we often do something like:
# narrowed_tensor = param.data.narrow(0, offset, len)
@@ -41,6 +43,7 @@ def set_weight_attrs(
# we sync the param tensor after its weight loader is called.
# TODO(woosuk): Remove this hack once we have a better solution.
from vllm.platforms import current_platform
if current_platform.is_tpu() and key == "weight_loader":
value = _make_synced_weight_loader(value)
setattr(weight, key, value)
@@ -77,4 +80,17 @@ def get_packed_modules_mapping(model: torch.nn.Module) -> dict[str, list[str]]:
f"safely because of conflicts from {type(child).__name__}.")
else:
parent_map.update(child_map)
return parent_map
return parent_map
def get_moe_expert_mapping(
model: torch.nn.Module, ) -> list[tuple[str, str, int, str]]:
if parent_map := getattr(model, "get_expert_mapping", None):
return parent_map()
else:
# We only check main components instead of whole model submodules
for child in model.children():
child_map = getattr(child, "get_expert_mapping", None)
if child_map is not None:
return child_map()
return []