[Frontend] Expose custom args in OpenAI APIs (#16862)

Signed-off-by: Andrew Feldman <afeldman@neuralmagic.com> Signed-off-by: Andrew Feldman <afeldman@redhat.com> Co-authored-by: Nick Hill <nhill@redhat.com>
2025-06-18 20:41:11 -04:00
parent ed33349738
commit dfada85eee
3 changed files with 44 additions and 14 deletions
--- a/benchmarks/kernels/benchmark_moe_align_block_size.py
+++ b/benchmarks/kernels/benchmark_moe_align_block_size.py
@@ -4,12 +4,12 @@ import argparse
 import itertools

 import torch
-import triton

 from vllm import _custom_ops as ops
 from vllm.model_executor.layers.fused_moe.moe_align_block_size import (
    moe_align_block_size_triton,
 )
+from vllm.triton_utils import triton


 def get_topk_ids(num_tokens: int, num_experts: int, topk: int) -> torch.Tensor: