Refactor sliding window configuration to Transformers best practice (#21927)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -49,6 +49,7 @@ from vllm.model_executor.model_loader.weight_utils import (
|
||||
default_weight_loader, maybe_remap_kv_scale_name)
|
||||
from vllm.model_executor.sampling_metadata import SamplingMetadata
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.transformers_utils.config import is_interleaved
|
||||
|
||||
from .interfaces import SupportsLoRA, SupportsPP
|
||||
from .utils import (AutoWeightsLoader, PPMissingLayer, extract_layer_index,
|
||||
@@ -285,8 +286,7 @@ class Qwen2Model(nn.Module):
|
||||
quant_config = vllm_config.quant_config
|
||||
|
||||
# TODO (@robertgshaw2): see if this can be moved out
|
||||
if (cache_config.sliding_window is not None
|
||||
and hasattr(config, "max_window_layers")):
|
||||
if is_interleaved(vllm_config.model_config.hf_text_config):
|
||||
assert config.max_window_layers == config.num_hidden_layers, (
|
||||
"Sliding window for some but all layers is not supported. "
|
||||
"This model uses sliding window but `max_window_layers` = {} "
|
||||
|
||||
Reference in New Issue
Block a user