[bugfix] interleaving sliding window for cohere2 model (#11583)

Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
youkaichao
2024-12-29 00:55:42 +08:00
committed by GitHub
parent d427e5cfda
commit 328841d002
7 changed files with 206 additions and 13 deletions

View File

@@ -301,7 +301,7 @@ class ModelConfig:
sliding_window = getattr(self.hf_text_config, "sliding_window", None)
has_interleaved_attention = (sliding_window is not None) and (
isinstance(sliding_window, list) or
(self.hf_text_config.model_type in ["gemma2"]))
(self.hf_text_config.model_type in ["gemma2", "cohere2"]))
if (not self.disable_sliding_window and has_interleaved_attention):
if envs.VLLM_ATTENTION_BACKEND == "XFORMERS":