[V0 deprecation] Remove long context LoRA (#21169)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Jee Jee Li
2025-07-19 17:15:41 +08:00
committed by GitHub
parent cf8cc32674
commit 1eaff27815
13 changed files with 35 additions and 301 deletions

View File

@@ -7,7 +7,7 @@ Punica: Multi-Tenant LoRA Serving.
https://arxiv.org/abs/2310.18547
"""
from typing import TYPE_CHECKING, Optional, Union, final
from typing import Optional, Union, final
import torch
@@ -21,10 +21,6 @@ if HAS_TRITON:
from .punica_base import PunicaWrapperBase
if TYPE_CHECKING:
# avoid circuit import
from vllm.lora.models import LongContextLoRAContext
@final
class PunicaWrapperGPU(PunicaWrapperBase):
@@ -55,20 +51,13 @@ class PunicaWrapperGPU(PunicaWrapperBase):
max_num_prompts,
device=device)
def update_metadata(
self,
mapping: LoRAMapping,
lora_index_to_id: list[Optional[int]],
max_loras: int,
vocab_size: int,
extra_vocab_size: int,
long_lora_context: Optional["LongContextLoRAContext"] = None,
**kwargs):
def update_metadata(self, mapping: LoRAMapping,
lora_index_to_id: list[Optional[int]], max_loras: int,
vocab_size: int, extra_vocab_size: int, **kwargs):
self.is_prefill = mapping.is_prefill
self._update_base_metadata(mapping, lora_index_to_id, max_loras,
vocab_size, extra_vocab_size,
long_lora_context)
vocab_size, extra_vocab_size)
# Prepare cuda kernel metadata tensors
self.token_mapping_meta.prepare_tensors(self.token_lora_indices)