[Misc] Remove LoRA log (#15388)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Jee Jee Li
2025-03-25 11:43:48 +08:00
committed by GitHub
parent 97cfa65df7
commit 6db94571d7
2 changed files with 5 additions and 15 deletions

View File

@@ -2373,12 +2373,6 @@ class LoRAConfig:
self.lora_dtype = model_config.dtype self.lora_dtype = model_config.dtype
elif isinstance(self.lora_dtype, str): elif isinstance(self.lora_dtype, str):
self.lora_dtype = getattr(torch, self.lora_dtype) self.lora_dtype = getattr(torch, self.lora_dtype)
if model_config.quantization and model_config.quantization not in [
"awq", "gptq"
]:
# TODO support marlin
logger.warning("%s quantization is not tested with LoRA yet.",
model_config.quantization)
def verify_with_scheduler_config(self, scheduler_config: SchedulerConfig): def verify_with_scheduler_config(self, scheduler_config: SchedulerConfig):
# Reminder: Please update docs/source/features/compatibility_matrix.md # Reminder: Please update docs/source/features/compatibility_matrix.md

View File

@@ -78,10 +78,6 @@ class PunicaWrapperGPU(PunicaWrapperBase):
...], scale: float, **kwargs): ...], scale: float, **kwargs):
""" """
Performs GEMM for multiple slices of lora_a. Performs GEMM for multiple slices of lora_a.
When `is_prefill is` true, it indicates that it is currently the
prefill stage, and the `_shrink_prefill` function should be called.
Otherwise, it is the decode stage, and the _shrink_decode function
should be called.
Semantics: Semantics:
for i in range(len(lora_a_stacked)): for i in range(len(lora_a_stacked)):