[Frontend] Expose custom args in OpenAI APIs (#16862)
Signed-off-by: Andrew Feldman <afeldman@neuralmagic.com> Signed-off-by: Andrew Feldman <afeldman@redhat.com> Co-authored-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
@@ -4,12 +4,12 @@ import argparse
|
||||
import itertools
|
||||
|
||||
import torch
|
||||
import triton
|
||||
|
||||
from vllm import _custom_ops as ops
|
||||
from vllm.model_executor.layers.fused_moe.moe_align_block_size import (
|
||||
moe_align_block_size_triton,
|
||||
)
|
||||
from vllm.triton_utils import triton
|
||||
|
||||
|
||||
def get_topk_ids(num_tokens: int, num_experts: int, topk: int) -> torch.Tensor:
|
||||
|
||||
Reference in New Issue
Block a user