[Misc] V1 LoRA support CPU offload (#15843)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
2025-04-02 23:04:43 +08:00
parent e86c414d6a
commit 58e234a754
1 changed files with 3 additions and 3 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -2434,9 +2434,9 @@ class LoRAConfig:
                f"max_loras ({self.max_loras})")
    def verify_with_cache_config(self, cache_config: CacheConfig):
-        # TODO LoRA supports CPU offload.
+        if cache_config.cpu_offload_gb > 0 and not envs.VLLM_USE_V1:
-        if cache_config.cpu_offload_gb > 0:
+            raise ValueError(
-            raise ValueError("CPU offload is not supported with LoRA yet.")
+                "V0 LoRA does not support CPU offload, please use V1.")
    def verify_with_model_config(self, model_config: ModelConfig):
        if self.lora_dtype in (None, "auto"):