[Hardware][CPU] Multi-LoRA implementation for the CPU backend (#11100)
Signed-off-by: Akshat Tripathi <akshat@krai.ai> Signed-off-by: Oleg Mosalov <oleg@krai.ai> Signed-off-by: Jee Jee Li <pandaleefree@gmail.com> Co-authored-by: Oleg Mosalov <oleg@krai.ai> Co-authored-by: Jee Jee Li <pandaleefree@gmail.com> Co-authored-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
@@ -12,11 +12,11 @@ import torch
|
||||
from vllm.triton_utils import HAS_TRITON
|
||||
|
||||
if HAS_TRITON:
|
||||
from vllm.lora.ops.bgmv_expand import bgmv_expand
|
||||
from vllm.lora.ops.bgmv_expand_slice import bgmv_expand_slice
|
||||
from vllm.lora.ops.bgmv_shrink import bgmv_shrink
|
||||
from vllm.lora.ops.sgmv_expand import sgmv_expand
|
||||
from vllm.lora.ops.sgmv_shrink import sgmv_shrink
|
||||
from vllm.lora.ops.triton_ops import bgmv_expand
|
||||
from vllm.lora.ops.triton_ops import bgmv_expand_slice
|
||||
from vllm.lora.ops.triton_ops import bgmv_shrink
|
||||
from vllm.lora.ops.triton_ops import sgmv_expand
|
||||
from vllm.lora.ops.triton_ops import sgmv_shrink
|
||||
|
||||
from .punica_base import PunicaWrapperBase
|
||||
|
||||
|
||||
Reference in New Issue
Block a user