[V1] Revert the default max_num_seqs to V0 values for most hardware (#16158)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-04-08 01:54:36 +08:00
committed by GitHub
parent 027b204ff1
commit 66d433b94f
3 changed files with 6 additions and 10 deletions

View File

@@ -64,15 +64,17 @@ def test_defaults_with_usage_context():
# For H100 and H200, we use larger default values.
default_llm_tokens = 16384
default_server_tokens = 8192
default_max_num_seqs = 1024
else:
default_llm_tokens = 8192
default_server_tokens = 2048
default_max_num_seqs = 256
assert vllm_config.scheduler_config.max_num_seqs == 1024
assert vllm_config.scheduler_config.max_num_seqs == default_max_num_seqs
assert vllm_config.scheduler_config.max_num_batched_tokens == default_llm_tokens # noqa: E501
engine_args = EngineArgs(model="facebook/opt-125m")
vllm_config = engine_args.create_engine_config(
UsageContext.OPENAI_API_SERVER)
assert vllm_config.scheduler_config.max_num_seqs == 1024
assert vllm_config.scheduler_config.max_num_seqs == default_max_num_seqs
assert vllm_config.scheduler_config.max_num_batched_tokens == default_server_tokens # noqa: E501