[Misc] V1 LoRA support CPU offload (#15843)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
@@ -2434,9 +2434,9 @@ class LoRAConfig:
|
|||||||
f"max_loras ({self.max_loras})")
|
f"max_loras ({self.max_loras})")
|
||||||
|
|
||||||
def verify_with_cache_config(self, cache_config: CacheConfig):
|
def verify_with_cache_config(self, cache_config: CacheConfig):
|
||||||
# TODO LoRA supports CPU offload.
|
if cache_config.cpu_offload_gb > 0 and not envs.VLLM_USE_V1:
|
||||||
if cache_config.cpu_offload_gb > 0:
|
raise ValueError(
|
||||||
raise ValueError("CPU offload is not supported with LoRA yet.")
|
"V0 LoRA does not support CPU offload, please use V1.")
|
||||||
|
|
||||||
def verify_with_model_config(self, model_config: ModelConfig):
|
def verify_with_model_config(self, model_config: ModelConfig):
|
||||||
if self.lora_dtype in (None, "auto"):
|
if self.lora_dtype in (None, "auto"):
|
||||||
|
|||||||
Reference in New Issue
Block a user