[Misc]add configurable cuda graph size (#17201)
Signed-off-by: CXIAAAAA <cxia0209@gmail.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -231,6 +231,8 @@ class EngineArgs:
|
||||
kv_cache_dtype: CacheDType = CacheConfig.cache_dtype
|
||||
seed: Optional[int] = ModelConfig.seed
|
||||
max_model_len: Optional[int] = ModelConfig.max_model_len
|
||||
cuda_graph_sizes: list[int] = get_field(SchedulerConfig,
|
||||
"cuda_graph_sizes")
|
||||
# Note: Specifying a custom executor backend by passing a class
|
||||
# is intended for expert use only. The API may change without
|
||||
# notice.
|
||||
@@ -711,6 +713,8 @@ class EngineArgs:
|
||||
scheduler_group.add_argument(
|
||||
"--max-long-partial-prefills",
|
||||
**scheduler_kwargs["max_long_partial_prefills"])
|
||||
scheduler_group.add_argument('--cuda-graph-sizes',
|
||||
**scheduler_kwargs["cuda_graph_sizes"])
|
||||
scheduler_group.add_argument(
|
||||
"--long-prefill-token-threshold",
|
||||
**scheduler_kwargs["long_prefill_token_threshold"])
|
||||
@@ -1042,6 +1046,7 @@ class EngineArgs:
|
||||
max_num_batched_tokens=self.max_num_batched_tokens,
|
||||
max_num_seqs=self.max_num_seqs,
|
||||
max_model_len=model_config.max_model_len,
|
||||
cuda_graph_sizes=self.cuda_graph_sizes,
|
||||
num_lookahead_slots=num_lookahead_slots,
|
||||
delay_factor=self.scheduler_delay_factor,
|
||||
enable_chunked_prefill=self.enable_chunked_prefill,
|
||||
|
||||
Reference in New Issue
Block a user