[Bugfix] Correct adapter usage for cohere and jamba (#8292)

This commit is contained in:
Vladislav Kruglikov
2024-09-09 21:20:46 +03:00
committed by GitHub
parent 58fcc8545a
commit f9b4a2d415
2 changed files with 6 additions and 3 deletions

View File

@@ -38,6 +38,8 @@ from vllm.sequence import IntermediateTensors
from vllm.worker.model_runner import (_BATCH_SIZES_TO_CAPTURE,
_get_graph_batch_size)
from .interfaces import SupportsLoRA
KVCache = Tuple[torch.Tensor, torch.Tensor]
@@ -539,7 +541,7 @@ class JambaModel(nn.Module):
return hidden_states
class JambaForCausalLM(nn.Module, HasInnerState):
class JambaForCausalLM(nn.Module, HasInnerState, SupportsLoRA):
packed_modules_mapping = {
"qkv_proj": [
"q_proj",