Make max_model_len configurable (#972)

This commit is contained in:
Antoni Baum
2023-09-12 16:29:19 -07:00
committed by GitHub
parent d6545ad22e
commit 0bb1e885a0
2 changed files with 22 additions and 1 deletions

View File

@@ -18,6 +18,7 @@ class EngineArgs:
load_format: str = 'auto'
dtype: str = 'auto'
seed: int = 0
max_model_len: Optional[int] = None
worker_use_ray: bool = False
pipeline_parallel_size: int = 1
tensor_parallel_size: int = 1
@@ -89,6 +90,11 @@ class EngineArgs:
'The "auto" option will use FP16 precision '
'for FP32 and FP16 models, and BF16 precision '
'for BF16 models.')
parser.add_argument('--max-model-len',
type=int,
default=None,
help='model context length. If unspecified, '
'will be automatically derived from the model.')
# Parallel arguments
parser.add_argument('--worker-use-ray',
action='store_true',
@@ -153,7 +159,7 @@ class EngineArgs:
model_config = ModelConfig(self.model, self.tokenizer,
self.tokenizer_mode, self.trust_remote_code,
self.download_dir, self.load_format,
self.dtype, self.seed)
self.dtype, self.seed, self.max_model_len)
cache_config = CacheConfig(self.block_size,
self.gpu_memory_utilization,
self.swap_space)