[Hardware][TPU][V1] Multi-LoRA implementation for the V1 TPU backend (#14238)
Signed-off-by: Akshat Tripathi <akshat@krai.ai> Signed-off-by: Chengji Yao <chengjiyao@google.com> Co-authored-by: Chengji Yao <chengjiyao@google.com>
This commit is contained in:
@@ -2694,8 +2694,8 @@ class LoRAConfig:
|
||||
lora_extra_vocab_size: int = 256
|
||||
"""Maximum size of extra vocabulary that can be present in a LoRA adapter
|
||||
(added to the base model vocabulary)."""
|
||||
# This is a constant.
|
||||
lora_vocab_padding_size: ClassVar[int] = 256
|
||||
lora_vocab_padding_size: ClassVar[int] = current_platform\
|
||||
.get_lora_vocab_padding_size()
|
||||
long_lora_scaling_factors: Optional[tuple[float, ...]] = None
|
||||
"""Specify multiple scaling factors (which can be different from base model
|
||||
scaling factor - see eg. Long LoRA) to allow for multiple LoRA adapters
|
||||
@@ -2723,6 +2723,7 @@ class LoRAConfig:
|
||||
factors.append(self.fully_sharded_loras)
|
||||
factors.append(self.lora_dtype)
|
||||
factors.append(self.lora_extra_vocab_size)
|
||||
factors.append(self.lora_vocab_padding_size)
|
||||
factors.append(self.long_lora_scaling_factors)
|
||||
factors.append(self.bias_enabled)
|
||||
hash_str = hashlib.md5(str(factors).encode(),
|
||||
|
||||
Reference in New Issue
Block a user