support bitsandbytes quantization with more models (#9148)

This commit is contained in:
chenqianfzh
2024-10-08 18:52:19 -07:00
committed by GitHub
parent 9ba0bd6aa6
commit 2f4117c38e
10 changed files with 165 additions and 28 deletions

View File

@@ -391,6 +391,17 @@ class FalconModel(nn.Module):
class FalconForCausalLM(nn.Module, SupportsPP):
# BitandBytes specific attributes
bitsandbytes_stacked_params_mapping = {}
default_bitsandbytes_target_modules = [
".query_key_value.",
".dense.",
".dense_h_to_4h.",
".dense_4h_to_h.",
]
# in TP, these weights are partitioned along the column dimension (dim=-1)
column_parallel_weights_modules = [".dense_4h_to_h.", ".dense."]
def __init__(
self,
config: FalconConfig,