[Bugfix] Fix nomic max_model_len (#18755)

This commit is contained in:
wang.yuqi
2025-05-28 11:29:53 +08:00
committed by GitHub
parent 794ae1f551
commit 3e9ce609bd
4 changed files with 241 additions and 2 deletions

View File

@@ -571,6 +571,7 @@ class ModelConfig:
sliding_window = None
self.original_max_model_len = self.max_model_len
self.max_model_len = _get_and_verify_max_len(
hf_config=self.hf_text_config,
max_model_len=self.max_model_len,
@@ -4471,6 +4472,19 @@ class VllmConfig:
self.compilation_config.init_with_cudagraph_sizes(
batch_size_capture_list)
def recalculate_max_model_len(self, max_model_len: int):
model_config = self.model_config
max_model_len = _get_and_verify_max_len(
hf_config=model_config.hf_text_config,
max_model_len=max_model_len,
disable_sliding_window=model_config.disable_sliding_window,
sliding_window_len=model_config.get_hf_config_sliding_window(),
spec_target_max_model_len=model_config.spec_target_max_model_len,
encoder_config=model_config.encoder_config)
self.model_config.max_model_len = max_model_len
self.scheduler_config.max_model_len = max_model_len
self.compute_hash()
def __str__(self):
return (
f"model={self.model_config.model!r},"