[Misc] Make SchedulerConfig.max_model_len init-only (#28733)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-11-15 17:59:31 +08:00
committed by GitHub
parent 1ec978c209
commit 638e4196d1
17 changed files with 22 additions and 45 deletions

View File

@@ -45,8 +45,6 @@ MNK_FACTORS = [
]
vllm_config = VllmConfig(parallel_config=ParallelConfig(pipeline_parallel_size=1))
vllm_config.scheduler_config.max_num_seqs = 128
vllm_config.scheduler_config.max_model_len = 8192
def quant_fp8_per_tensor_batches(a):