[Fix] Add model sequence length into model config (#575)

This commit is contained in:
Zhuohan Li
2023-07-25 23:46:30 -07:00
committed by GitHub
parent 82ad323dee
commit 58a072be15
3 changed files with 27 additions and 18 deletions

View File

@@ -155,10 +155,9 @@ class EngineArgs:
parallel_config = ParallelConfig(self.pipeline_parallel_size,
self.tensor_parallel_size,
self.worker_use_ray)
max_model_len = getattr(model_config.hf_config,
'max_position_embeddings', float('inf'))
scheduler_config = SchedulerConfig(self.max_num_batched_tokens,
self.max_num_seqs, max_model_len)
self.max_num_seqs,
model_config.get_max_model_len())
return model_config, cache_config, parallel_config, scheduler_config