[V1] Remove V0 code paths for Hybrid models (#25400)
Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
This commit is contained in:
@@ -8,7 +8,6 @@ import torch
|
||||
import torch.nn as nn
|
||||
from transformers import Lfm2Config
|
||||
|
||||
from vllm import envs
|
||||
from vllm.attention import Attention
|
||||
from vllm.compilation.decorators import support_torch_compile
|
||||
from vllm.config import CacheConfig, ModelConfig, VllmConfig
|
||||
@@ -297,7 +296,6 @@ class Lfm2ShortConvDecoderLayer(nn.Module):
|
||||
self.conv(
|
||||
hidden_states,
|
||||
output,
|
||||
conv_metadata=None,
|
||||
)
|
||||
hidden_states, residual = self.ffn_norm(output, residual)
|
||||
hidden_states = self.feed_forward(hidden_states)
|
||||
@@ -459,13 +457,11 @@ class Lfm2ForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP,
|
||||
def get_mamba_state_shape_from_config(
|
||||
cls,
|
||||
vllm_config: "VllmConfig",
|
||||
use_v1: bool = True,
|
||||
) -> tuple[tuple[int, int]]:
|
||||
""" Calculate shapes for LFM2's convolutional cache.
|
||||
|
||||
Args:
|
||||
vllm_config: vLLM config
|
||||
use_v1: Get shapes for V1 (or V0)
|
||||
|
||||
Returns:
|
||||
Tuple containing:
|
||||
@@ -478,7 +474,6 @@ class Lfm2ForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP,
|
||||
tp_world_size=parallel_config.tensor_parallel_size,
|
||||
intermediate_size=hf_config.conv_dim,
|
||||
conv_kernel=hf_config.conv_L_cache,
|
||||
use_v1=use_v1,
|
||||
)
|
||||
|
||||
def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
|
||||
@@ -489,8 +484,6 @@ class Lfm2ForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP,
|
||||
scheduler_config = vllm_config.scheduler_config
|
||||
assert (not cache_config.enable_prefix_caching
|
||||
), "Lfm2 currently does not support prefix caching"
|
||||
assert envs.VLLM_USE_V1, (
|
||||
"Lfm2ForCausalLM doesn't support vLLM v0. Please enable v1")
|
||||
|
||||
super().__init__()
|
||||
self.config = config
|
||||
|
||||
Reference in New Issue
Block a user