[V0 deprecation] Remove long context LoRA (#21169)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
@@ -7,7 +7,7 @@ Punica: Multi-Tenant LoRA Serving.
|
||||
https://arxiv.org/abs/2310.18547
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING, Optional, Union, final
|
||||
from typing import Optional, Union, final
|
||||
|
||||
import torch
|
||||
|
||||
@@ -21,10 +21,6 @@ if HAS_TRITON:
|
||||
|
||||
from .punica_base import PunicaWrapperBase
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# avoid circuit import
|
||||
from vllm.lora.models import LongContextLoRAContext
|
||||
|
||||
|
||||
@final
|
||||
class PunicaWrapperGPU(PunicaWrapperBase):
|
||||
@@ -55,20 +51,13 @@ class PunicaWrapperGPU(PunicaWrapperBase):
|
||||
max_num_prompts,
|
||||
device=device)
|
||||
|
||||
def update_metadata(
|
||||
self,
|
||||
mapping: LoRAMapping,
|
||||
lora_index_to_id: list[Optional[int]],
|
||||
max_loras: int,
|
||||
vocab_size: int,
|
||||
extra_vocab_size: int,
|
||||
long_lora_context: Optional["LongContextLoRAContext"] = None,
|
||||
**kwargs):
|
||||
def update_metadata(self, mapping: LoRAMapping,
|
||||
lora_index_to_id: list[Optional[int]], max_loras: int,
|
||||
vocab_size: int, extra_vocab_size: int, **kwargs):
|
||||
|
||||
self.is_prefill = mapping.is_prefill
|
||||
self._update_base_metadata(mapping, lora_index_to_id, max_loras,
|
||||
vocab_size, extra_vocab_size,
|
||||
long_lora_context)
|
||||
vocab_size, extra_vocab_size)
|
||||
|
||||
# Prepare cuda kernel metadata tensors
|
||||
self.token_mapping_meta.prepare_tensors(self.token_lora_indices)
|
||||
|
||||
Reference in New Issue
Block a user