[Frontend] Pass API server count to each process (#23717)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-09-20 01:15:19 +08:00
committed by GitHub
parent 7ac67ea525
commit 6c117cff7d
12 changed files with 221 additions and 51 deletions

View File

@@ -11,13 +11,13 @@ from datetime import datetime
from typing import Any
import torch
import triton
from tqdm import tqdm
from vllm.model_executor.layers.quantization.utils.fp8_utils import (
_w8a8_block_fp8_matmul,
)
from vllm.platforms import current_platform
from vllm.triton_utils import triton
from vllm.utils import FlexibleArgumentParser
mp.set_start_method("spawn", force=True)