Push logprob generation to LLMEngine (#3065)
Co-authored-by: Avnish Narayan <avnish@anyscale.com>
This commit is contained in:
@@ -31,6 +31,7 @@ class EngineArgs:
|
||||
max_num_batched_tokens: Optional[int] = None
|
||||
max_num_seqs: int = 256
|
||||
max_paddings: int = 256
|
||||
max_logprobs: int = 5 # OpenAI default value
|
||||
disable_log_stats: bool = False
|
||||
revision: Optional[str] = None
|
||||
code_revision: Optional[str] = None
|
||||
@@ -212,6 +213,12 @@ class EngineArgs:
|
||||
type=int,
|
||||
default=EngineArgs.max_paddings,
|
||||
help='maximum number of paddings in a batch')
|
||||
parser.add_argument(
|
||||
'--max-logprobs',
|
||||
type=int,
|
||||
default=EngineArgs.max_logprobs,
|
||||
help=('max number of log probs to return logprobs is specified in'
|
||||
' SamplingParams'))
|
||||
parser.add_argument('--disable-log-stats',
|
||||
action='store_true',
|
||||
help='disable logging statistics')
|
||||
@@ -300,7 +307,8 @@ class EngineArgs:
|
||||
self.trust_remote_code, self.download_dir, self.load_format,
|
||||
self.dtype, self.seed, self.revision, self.code_revision,
|
||||
self.tokenizer_revision, self.max_model_len, self.quantization,
|
||||
self.enforce_eager, self.max_context_len_to_capture)
|
||||
self.enforce_eager, self.max_context_len_to_capture,
|
||||
self.max_logprobs)
|
||||
cache_config = CacheConfig(self.block_size,
|
||||
self.gpu_memory_utilization,
|
||||
self.swap_space, self.kv_cache_dtype,
|
||||
|
||||
Reference in New Issue
Block a user