Signed-off-by: Bowen Bao <bowenbao@amd.com> Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
8 lines
302 B
YAML
8 lines
302 B
YAML
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
model_name: amd/gpt-oss-20b-MoE-Quant-W-MXFP4-A-FP8-KV-FP8
|
|
metric_threshold: 0.568
|
|
reasoning_effort: low
|
|
server_args: "--attention-backend ROCM_AITER_UNIFIED_ATTN"
|
|
env:
|
|
VLLM_ROCM_USE_AITER: "1" |