support bitsandbytes quantization with more models (#9148)
This commit is contained in:
@@ -391,6 +391,17 @@ class FalconModel(nn.Module):
|
||||
|
||||
class FalconForCausalLM(nn.Module, SupportsPP):
|
||||
|
||||
# BitandBytes specific attributes
|
||||
bitsandbytes_stacked_params_mapping = {}
|
||||
default_bitsandbytes_target_modules = [
|
||||
".query_key_value.",
|
||||
".dense.",
|
||||
".dense_h_to_4h.",
|
||||
".dense_4h_to_h.",
|
||||
]
|
||||
# in TP, these weights are partitioned along the column dimension (dim=-1)
|
||||
column_parallel_weights_modules = [".dense_4h_to_h.", ".dense."]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: FalconConfig,
|
||||
|
||||
Reference in New Issue
Block a user