[Config] Clean up SchedulerConfig initialization (#28665)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-11-14 22:41:02 +08:00
committed by GitHub
parent 96b23b8e3b
commit 511a6b611d
9 changed files with 182 additions and 163 deletions

View File

@@ -3,7 +3,7 @@
import uuid
import warnings
from typing import Any, TypeVar
from typing import Any
import torch
@@ -39,12 +39,6 @@ def __dir__() -> list[str]:
logger = init_logger(__name__)
# This value is chosen to have a balance between ITL and TTFT. Note it is
# not optimized for throughput.
DEFAULT_MAX_NUM_BATCHED_TOKENS = 2048
POOLING_MODEL_MAX_NUM_BATCHED_TOKENS = 32768
MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS = 5120
# Constants related to forcing the attention backend selection
# String name of register which may be set in order to
@@ -60,9 +54,6 @@ STR_FLASH_ATTN_VAL: str = "FLASH_ATTN"
STR_INVALID_VAL: str = "INVALID"
T = TypeVar("T")
def random_uuid() -> str:
return str(uuid.uuid4().hex)