[V1] Revert the default max_num_seqs to V0 values for most hardware (#16158)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-04-08 01:54:36 +08:00
committed by GitHub
parent 027b204ff1
commit 66d433b94f
3 changed files with 6 additions and 10 deletions

View File

@@ -1666,12 +1666,14 @@ class EngineArgs:
UsageContext.LLM_CLASS: 16384,
UsageContext.OPENAI_API_SERVER: 8192,
}
default_max_num_seqs = 1024
else:
# TODO(woosuk): Tune the default values for other hardware.
default_max_num_batched_tokens = {
UsageContext.LLM_CLASS: 8192,
UsageContext.OPENAI_API_SERVER: 2048,
}
default_max_num_seqs = 256
use_context_value = usage_context.value if usage_context else None
if (self.max_num_batched_tokens is None
@@ -1682,7 +1684,6 @@ class EngineArgs:
"Setting max_num_batched_tokens to %d for %s usage context.",
self.max_num_batched_tokens, use_context_value)
default_max_num_seqs = 1024
if self.max_num_seqs is None:
self.max_num_seqs = default_max_num_seqs