Reduce GPU memory utilization to make sure OOM doesn't happen (#153)
This commit is contained in:
@@ -21,7 +21,7 @@ class EngineArgs:
|
|||||||
tensor_parallel_size: int = 1
|
tensor_parallel_size: int = 1
|
||||||
block_size: int = 16
|
block_size: int = 16
|
||||||
swap_space: int = 4 # GiB
|
swap_space: int = 4 # GiB
|
||||||
gpu_memory_utilization: float = 0.95
|
gpu_memory_utilization: float = 0.90
|
||||||
max_num_batched_tokens: int = 2560
|
max_num_batched_tokens: int = 2560
|
||||||
max_num_seqs: int = 256
|
max_num_seqs: int = 256
|
||||||
disable_log_stats: bool = False
|
disable_log_stats: bool = False
|
||||||
|
|||||||
Reference in New Issue
Block a user