[Frontend] Expose custom args in OpenAI APIs (#16862)

Signed-off-by: Andrew Feldman <afeldman@neuralmagic.com>
Signed-off-by: Andrew Feldman <afeldman@redhat.com>
Co-authored-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
afeldman-nm
2025-06-18 20:41:11 -04:00
committed by GitHub
parent ed33349738
commit dfada85eee
3 changed files with 44 additions and 14 deletions

View File

@@ -4,12 +4,12 @@ import argparse
import itertools
import torch
import triton
from vllm import _custom_ops as ops
from vllm.model_executor.layers.fused_moe.moe_align_block_size import (
moe_align_block_size_triton,
)
from vllm.triton_utils import triton
def get_topk_ids(num_tokens: int, num_experts: int, topk: int) -> torch.Tensor: