[V0 deprecation] Remove long context LoRA (#21169)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
2025-07-19 17:15:41 +08:00
parent cf8cc32674
commit 1eaff27815
13 changed files with 35 additions and 301 deletions
--- a/vllm/lora/punica_wrapper/punica_gpu.py
+++ b/vllm/lora/punica_wrapper/punica_gpu.py
@@ -7,7 +7,7 @@ Punica: Multi-Tenant LoRA Serving.
 https://arxiv.org/abs/2310.18547
 """

-from typing import TYPE_CHECKING, Optional, Union, final
+from typing import Optional, Union, final

 import torch

@@ -21,10 +21,6 @@ if HAS_TRITON:

 from .punica_base import PunicaWrapperBase

-if TYPE_CHECKING:
-    # avoid circuit import
-    from vllm.lora.models import LongContextLoRAContext
-

@final
 class PunicaWrapperGPU(PunicaWrapperBase):
@@ -55,20 +51,13 @@ class PunicaWrapperGPU(PunicaWrapperBase):
                                                       max_num_prompts,
                                                       device=device)

-    def update_metadata(
-            self,
-            mapping: LoRAMapping,
-            lora_index_to_id: list[Optional[int]],
-            max_loras: int,
-            vocab_size: int,
-            extra_vocab_size: int,
-            long_lora_context: Optional["LongContextLoRAContext"] = None,
-            **kwargs):
+    def update_metadata(self, mapping: LoRAMapping,
+                        lora_index_to_id: list[Optional[int]], max_loras: int,
+                        vocab_size: int, extra_vocab_size: int, **kwargs):

        self.is_prefill = mapping.is_prefill
        self._update_base_metadata(mapping, lora_index_to_id, max_loras,
-                                   vocab_size, extra_vocab_size,
-                                   long_lora_context)
+                                   vocab_size, extra_vocab_size)

        # Prepare cuda kernel metadata tensors
        self.token_mapping_meta.prepare_tensors(self.token_lora_indices)