[V1][Hybrid] Mamba Prefix Caching with align mode (#30877)
Signed-off-by: huanghaoyan.hhy <huanghaoyan.hhy@alibaba-inc.com> Signed-off-by: Chen Zhang <zhangch99@outlook.com> Co-authored-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
@@ -2128,3 +2128,7 @@ class NemotronH_Nano_VL_V2(
|
||||
temp_vllm_config = copy.deepcopy(vllm_config)
|
||||
temp_vllm_config.model_config.hf_config = text_config
|
||||
return NemotronHForCausalLM.get_mamba_state_dtype_from_config(temp_vllm_config)
|
||||
|
||||
@classmethod
|
||||
def get_mamba_state_copy_func(cls):
|
||||
return NemotronHForCausalLM.get_mamba_state_copy_func()
|
||||
|
||||
Reference in New Issue
Block a user