[Misc] Make SchedulerConfig.max_model_len init-only (#28733)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-11-15 17:59:31 +08:00
committed by GitHub
parent 1ec978c209
commit 638e4196d1
17 changed files with 22 additions and 45 deletions

View File

@@ -29,8 +29,6 @@ if current_platform.get_device_capability() < (9, 0):
pytest.skip("FP8 Triton requires CUDA 9.0 or higher", allow_module_level=True)
vllm_config = VllmConfig()
vllm_config.scheduler_config.max_num_seqs = 128
vllm_config.scheduler_config.max_model_len = 8192
# Test configurations
DTYPES = [torch.bfloat16] # [torch.half, torch.bfloat16, torch.float32]