Make it easy to profile workers with nsight (#3162)
Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com>
This commit is contained in:
@@ -46,6 +46,7 @@ class EngineArgs:
|
||||
lora_dtype = 'auto'
|
||||
max_cpu_loras: Optional[int] = None
|
||||
device: str = 'auto'
|
||||
ray_workers_use_nsight: bool = False
|
||||
|
||||
def __post_init__(self):
|
||||
if self.tokenizer is None:
|
||||
@@ -168,6 +169,10 @@ class EngineArgs:
|
||||
help='load model sequentially in multiple batches, '
|
||||
'to avoid RAM OOM when using tensor '
|
||||
'parallel and large models')
|
||||
parser.add_argument(
|
||||
'--ray-workers-use-nsight',
|
||||
action='store_true',
|
||||
help='If specified, use nsight to profile ray workers')
|
||||
# KV cache arguments
|
||||
parser.add_argument('--block-size',
|
||||
type=int,
|
||||
@@ -305,7 +310,8 @@ class EngineArgs:
|
||||
self.tensor_parallel_size,
|
||||
self.worker_use_ray,
|
||||
self.max_parallel_loading_workers,
|
||||
self.disable_custom_all_reduce)
|
||||
self.disable_custom_all_reduce,
|
||||
self.ray_workers_use_nsight)
|
||||
scheduler_config = SchedulerConfig(self.max_num_batched_tokens,
|
||||
self.max_num_seqs,
|
||||
model_config.max_model_len,
|
||||
|
||||
Reference in New Issue
Block a user