[Misc] Remove LoRA log (#15388)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
@@ -2373,12 +2373,6 @@ class LoRAConfig:
|
||||
self.lora_dtype = model_config.dtype
|
||||
elif isinstance(self.lora_dtype, str):
|
||||
self.lora_dtype = getattr(torch, self.lora_dtype)
|
||||
if model_config.quantization and model_config.quantization not in [
|
||||
"awq", "gptq"
|
||||
]:
|
||||
# TODO support marlin
|
||||
logger.warning("%s quantization is not tested with LoRA yet.",
|
||||
model_config.quantization)
|
||||
|
||||
def verify_with_scheduler_config(self, scheduler_config: SchedulerConfig):
|
||||
# Reminder: Please update docs/source/features/compatibility_matrix.md
|
||||
|
||||
@@ -78,10 +78,6 @@ class PunicaWrapperGPU(PunicaWrapperBase):
|
||||
...], scale: float, **kwargs):
|
||||
"""
|
||||
Performs GEMM for multiple slices of lora_a.
|
||||
When `is_prefill is` true, it indicates that it is currently the
|
||||
prefill stage, and the `_shrink_prefill` function should be called.
|
||||
Otherwise, it is the decode stage, and the _shrink_decode function
|
||||
should be called.
|
||||
|
||||
Semantics:
|
||||
for i in range(len(lora_a_stacked)):
|
||||
|
||||
Reference in New Issue
Block a user