[Model] Support Mamba (#6484)

This commit is contained in:
Tyler Michael Smith
2024-10-11 11:40:06 -04:00
committed by GitHub
parent df3dcdf49d
commit 7342a7d7f8
29 changed files with 1603 additions and 343 deletions

View File

@@ -912,6 +912,7 @@ class EngineArgs:
gpu_memory_utilization=self.gpu_memory_utilization,
swap_space=self.swap_space,
cache_dtype=self.kv_cache_dtype,
is_attention_free=model_config.is_attention_free,
num_gpu_blocks_override=self.num_gpu_blocks_override,
sliding_window=model_config.get_sliding_window(),
enable_prefix_caching=self.enable_prefix_caching,
@@ -945,13 +946,9 @@ class EngineArgs:
use_sliding_window = (model_config.get_sliding_window()
is not None)
use_spec_decode = self.speculative_model is not None
has_seqlen_agnostic_layers = (
model_config.contains_seqlen_agnostic_layers(
parallel_config))
if (is_gpu and not use_sliding_window and not use_spec_decode
and not self.enable_lora
and not self.enable_prompt_adapter
and not has_seqlen_agnostic_layers):
and not self.enable_prompt_adapter):
self.enable_chunked_prefill = True
logger.warning(
"Chunked prefill is enabled by default for models with "