Update vLLM Benchmark Suite for Xeon based on 0.9.2 release (#21486)
Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
This commit is contained in:
@@ -6,6 +6,7 @@
|
||||
"VLLM_RPC_TIMEOUT": 100000,
|
||||
"VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
|
||||
"VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
|
||||
"VLLM_CPU_SGL_KERNEL": 1,
|
||||
"VLLM_CPU_KVCACHE_SPACE": 40
|
||||
},
|
||||
"server_parameters": {
|
||||
@@ -18,6 +19,8 @@
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
"load_format": "dummy"
|
||||
},
|
||||
"client_parameters": {
|
||||
@@ -36,6 +39,7 @@
|
||||
"VLLM_RPC_TIMEOUT": 100000,
|
||||
"VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
|
||||
"VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
|
||||
"VLLM_CPU_SGL_KERNEL": 1,
|
||||
"VLLM_CPU_KVCACHE_SPACE": 40
|
||||
},
|
||||
"server_parameters": {
|
||||
@@ -48,6 +52,8 @@
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
"load_format": "dummy"
|
||||
},
|
||||
"client_parameters": {
|
||||
@@ -66,6 +72,7 @@
|
||||
"VLLM_RPC_TIMEOUT": 100000,
|
||||
"VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
|
||||
"VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
|
||||
"VLLM_CPU_SGL_KERNEL": 1,
|
||||
"VLLM_CPU_KVCACHE_SPACE": 40
|
||||
},
|
||||
"server_parameters": {
|
||||
@@ -78,6 +85,8 @@
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
"load_format": "dummy"
|
||||
},
|
||||
"client_parameters": {
|
||||
@@ -96,6 +105,7 @@
|
||||
"VLLM_RPC_TIMEOUT": 100000,
|
||||
"VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
|
||||
"VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
|
||||
"VLLM_CPU_SGL_KERNEL": 1,
|
||||
"VLLM_CPU_KVCACHE_SPACE": 40
|
||||
},
|
||||
"server_parameters": {
|
||||
@@ -109,6 +119,8 @@
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
"load_format": "dummy"
|
||||
},
|
||||
"client_parameters": {
|
||||
@@ -129,6 +141,7 @@
|
||||
"VLLM_RPC_TIMEOUT": 100000,
|
||||
"VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
|
||||
"VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
|
||||
"VLLM_CPU_SGL_KERNEL": 1,
|
||||
"VLLM_CPU_KVCACHE_SPACE": 40
|
||||
},
|
||||
"server_parameters": {
|
||||
@@ -142,6 +155,8 @@
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
"load_format": "dummy"
|
||||
},
|
||||
"client_parameters": {
|
||||
|
||||
Reference in New Issue
Block a user