[Quantization][V1] BitsAndBytes support V1 (#15611)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Jee Jee Li
2025-03-28 10:12:47 +08:00
committed by GitHub
parent bd45912b99
commit 726efc6a32
7 changed files with 52 additions and 24 deletions

View File

@@ -1259,6 +1259,8 @@ class BitsAndBytesModelLoader(BaseModelLoader):
pack_ratio)
offsets = np.concatenate(([0], np.cumsum(num_elements)))
# Make torch infer_schema happy
offsets = torch.tensor(offsets).cpu()
set_weight_attrs(param, {"bnb_shard_offsets": offsets})
if load_8bit: